Documentation ¶
Overview ¶
Package tcp contains the implementation of the TCP transport protocol.
Index ¶
- Constants
- func FindWndScale(wnd seqnum.Size) int
- func GetTCPReceiveBufferLimits(s tcpip.StackHandler) tcpip.ReceiveBufferSizeOption
- func GetTCPSendBufferLimits(sh tcpip.StackHandler) tcpip.SendBufferSizeOption
- func NewProtocol(s *stack.Stack) stack.TransportProtocol
- func NewProtocolCUBIC(s *stack.Stack) stack.TransportProtocol
- func NewProtocolProbe(probe TCPProbeFunc) func(*stack.Stack) stack.TransportProtocol
- func TrimSACKBlockList(sack *SACKInfo, rcvNxt seqnum.Value)
- func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value, ...)
- type Endpoint
- func (e *Endpoint) Abort()
- func (e *Endpoint) Accept(peerAddr *tcpip.FullAddress) (tcpip.Endpoint, *waiter.Queue, tcpip.Error)
- func (e *Endpoint) AssertLockHeld(locked *Endpoint)
- func (e *Endpoint) Bind(addr tcpip.FullAddress) (err tcpip.Error)
- func (e *Endpoint) Close()
- func (e *Endpoint) Connect(addr tcpip.FullAddress) tcpip.Error
- func (*Endpoint) ConnectEndpoint(tcpip.Endpoint) tcpip.Error
- func (*Endpoint) Disconnect() tcpip.Error
- func (e *Endpoint) EndpointState() EndpointState
- func (e *Endpoint) GetAcceptConn() bool
- func (e *Endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
- func (e *Endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error)
- func (e *Endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error
- func (e *Endpoint) GetSockOptInt(opt tcpip.SockOptInt) (int, tcpip.Error)
- func (e *Endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketBuffer)
- func (*Endpoint) HandlePacket(stack.TransportEndpointID, *stack.PacketBuffer)
- func (e *Endpoint) HasNIC(id int32) bool
- func (e *Endpoint) Info() tcpip.EndpointInfo
- func (e *Endpoint) LastError() tcpip.Error
- func (e *Endpoint) LastErrorLocked() tcpip.Error
- func (e *Endpoint) Listen(backlog int) tcpip.Error
- func (e *Endpoint) LockUser()
- func (e *Endpoint) ModerateRecvBuf(copied int)
- func (e *Endpoint) OnCorkOptionSet(v bool)
- func (e *Endpoint) OnDelayOptionSet(v bool)
- func (e *Endpoint) OnKeepAliveSet(bool)
- func (e *Endpoint) OnReuseAddressSet(v bool)
- func (e *Endpoint) OnReusePortSet(v bool)
- func (e *Endpoint) OnSetReceiveBufferSize(rcvBufSz, oldSz int64) (newSz int64, postSet func())
- func (e *Endpoint) OnSetSendBufferSize(sz int64) int64
- func (e *Endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error)
- func (e *Endpoint) Readiness(mask waiter.EventMask) waiter.EventMask
- func (e *Endpoint) Restore(s *stack.Stack)
- func (e *Endpoint) Resume()
- func (e *Endpoint) ResumeWork()
- func (e *Endpoint) SetOwner(owner tcpip.PacketOwner)
- func (e *Endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error
- func (e *Endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error
- func (e *Endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error
- func (e *Endpoint) SocketOptions() *tcpip.SocketOptions
- func (e *Endpoint) State() uint32
- func (e *Endpoint) Stats() tcpip.EndpointStats
- func (e *Endpoint) StopWork()
- func (e *Endpoint) TryLock() bool
- func (e *Endpoint) UnlockUser()
- func (e *Endpoint) UpdateLastError(err tcpip.Error)
- func (e *Endpoint) Wait()
- func (e *Endpoint) WakeupWriters()
- func (e *Endpoint) Write(p tcpip.Payloader, opts tcpip.WriteOptions) (int64, tcpip.Error)
- type EndpointState
- type Forwarder
- type ForwarderRequest
- type RcvBufAutoTuneParams
- type ReceiveErrors
- type SACKInfo
- type SACKScoreboard
- func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value)
- func (s *SACKScoreboard) Delete(seq seqnum.Value)
- func (s *SACKScoreboard) Empty() bool
- func (s *SACKScoreboard) Insert(r header.SACKBlock)
- func (s *SACKScoreboard) IsLost(seq seqnum.Value) bool
- func (s *SACKScoreboard) IsRangeLost(r header.SACKBlock) bool
- func (s *SACKScoreboard) IsSACKED(r header.SACKBlock) bool
- func (s *SACKScoreboard) MaxSACKED() seqnum.Value
- func (s *SACKScoreboard) Reset()
- func (s *SACKScoreboard) SMSS() uint16
- func (s *SACKScoreboard) Sacked() seqnum.Size
- func (s *SACKScoreboard) String() string
- type SendErrors
- type Stats
- type TCPCubicState
- type TCPEndpointID
- type TCPEndpointState
- type TCPEndpointStateInner
- type TCPFastRecoveryState
- type TCPProbeFunc
- type TCPRACKState
- type TCPRTTState
- type TCPRcvBufState
- type TCPReceiverState
- type TCPSACKInfo
- type TCPSenderState
- type TCPSndBufState
Constants ¶
const ( // ProtocolNumber is the tcp protocol number. ProtocolNumber = header.TCPProtocolNumber // MinBufferSize is the smallest size of a receive or send buffer. MinBufferSize = 4 << 10 // 4096 bytes. // DefaultSendBufferSize is the default size of the send buffer for // an endpoint. DefaultSendBufferSize = 1 << 20 // 1MB // DefaultReceiveBufferSize is the default size of the receive buffer // for an endpoint. DefaultReceiveBufferSize = 1 << 20 // 1MB // MaxBufferSize is the largest size a receive/send buffer can grow to. MaxBufferSize = 4 << 20 // 4MB // DefaultTCPLingerTimeout is the amount of time that sockets linger in // FIN_WAIT_2 state before being marked closed. DefaultTCPLingerTimeout = 60 * time.Second // MaxTCPLingerTimeout is the maximum amount of time that sockets // linger in FIN_WAIT_2 state before being marked closed. MaxTCPLingerTimeout = 120 * time.Second // DefaultTCPTimeWaitTimeout is the amount of time that sockets linger // in TIME_WAIT state before being marked closed. DefaultTCPTimeWaitTimeout = 60 * time.Second // DefaultSynRetries is the default value for the number of SYN retransmits // before a connect is aborted. DefaultSynRetries = 6 // DefaultKeepaliveIdle is the idle time for a connection before keep-alive // probes are sent. DefaultKeepaliveIdle = 2 * time.Hour // DefaultKeepaliveInterval is the time between two successive keep-alive // probes. DefaultKeepaliveInterval = 75 * time.Second // DefaultKeepaliveCount is the number of keep-alive probes that are sent // before declaring the connection dead. DefaultKeepaliveCount = 9 )
const ( // MinRTO is the minimum allowed value for the retransmit timeout. MinRTO = 200 * time.Millisecond // MaxRTO is the maximum allowed value for the retransmit timeout. MaxRTO = 120 * time.Second // MinSRTT is the minimum allowed value for smoothed RTT. MinSRTT = 1 * time.Millisecond // InitialCwnd is the initial congestion window. InitialCwnd = 10 // MaxRetries is the maximum number of probe retries sender does // before timing out the connection. // Linux default TCP_RETR2, net.ipv4.tcp_retries2. MaxRetries = 15 // InitialSsthresh is the the maximum int value, which depends on the // platform. InitialSsthresh = math.MaxInt )
const ( // InitialRTO is the initial retransmission timeout. // https://github.com/torvalds/linux/blob/7c636d4d20f/include/net/tcp.h#L142 InitialRTO = time.Second )
const ( // MaxSACKBlocks is the maximum number of SACK blocks stored // at receiver side. MaxSACKBlocks = 6 )
const ( // SegOverheadFactor is used to multiply the value provided by the // user on a SetSockOpt for setting the socket send/receive buffer sizes. SegOverheadFactor = 2 )
const ( // SegOverheadSize is the size of an empty seg in memory including packet // buffer overhead. It is advised to use SegOverheadSize instead of segSize // in all cases where accounting for segment memory overhead is important. SegOverheadSize = segSize + stack.PacketBufferStructSize + header.IPv4MaximumHeaderSize )
Variables ¶
This section is empty.
Functions ¶
func FindWndScale ¶
FindWndScale determines the window scale to use for the given maximum window size.
func GetTCPReceiveBufferLimits ¶
func GetTCPReceiveBufferLimits(s tcpip.StackHandler) tcpip.ReceiveBufferSizeOption
GetTCPReceiveBufferLimits is used to get send buffer size limits for TCP.
func GetTCPSendBufferLimits ¶
func GetTCPSendBufferLimits(sh tcpip.StackHandler) tcpip.SendBufferSizeOption
GetTCPSendBufferLimits is used to get send buffer size limits for TCP.
func NewProtocol ¶
func NewProtocol(s *stack.Stack) stack.TransportProtocol
NewProtocol returns a TCP transport protocol with Reno congestion control.
func NewProtocolCUBIC ¶
func NewProtocolCUBIC(s *stack.Stack) stack.TransportProtocol
NewProtocolCUBIC returns a TCP transport protocol with CUBIC congestion control.
TODO(b/345835636): Remove this and make CUBIC the default across the board.
func NewProtocolProbe ¶
func NewProtocolProbe(probe TCPProbeFunc) func(*stack.Stack) stack.TransportProtocol
NewProtocolProbe returns a TCP transport protocol with Reno congestion control and the given probe.
The probe will be invoked on every segment received by TCP endpoints. The probe function is passed a copy of the TCP endpoint state before and after processing of the segment.
func TrimSACKBlockList ¶
TrimSACKBlockList updates the sack block list by removing/modifying any block where start is < rcvNxt.
func UpdateSACKBlocks ¶
func UpdateSACKBlocks(sack *SACKInfo, segStart seqnum.Value, segEnd seqnum.Value, rcvNxt seqnum.Value)
UpdateSACKBlocks updates the list of SACK blocks to include the segment specified by segStart->segEnd. If the segment happens to be an out of order delivery then the first block in the sack.blocks always includes the segment identified by segStart->segEnd.
Types ¶
type Endpoint ¶
type Endpoint struct { TCPEndpointStateInner stack.TransportEndpointInfo tcpip.DefaultSocketOptionsHandler // +checklocks:rcvQueueMu TCPRcvBufState // contains filtered or unexported fields }
Endpoint represents a TCP endpoint. This struct serves as the interface between users of the endpoint and the protocol implementation; it is legal to have concurrent goroutines make calls into the endpoint, they are properly synchronized. The protocol implementation, however, runs in a single goroutine.
Each endpoint has a few mutexes:
e.mu -> Primary mutex for an endpoint must be held for all operations except in e.Readiness where acquiring it will result in a deadlock in epoll implementation.
The following three mutexes can be acquired independent of e.mu but if acquired with e.mu then e.mu must be acquired first.
e.acceptMu -> Protects e.acceptQueue. e.rcvQueueMu -> Protects e.rcvQueue's associated fields but not e.rcvQueue itself. e.sndQueueMu -> Protects the e.sndQueue and associated fields. e.lastErrorMu -> Protects the lastError field.
LOCKING/UNLOCKING of the endpoint. The locking of an endpoint is different based on the context in which the lock is acquired. In the syscall context e.LockUser/e.UnlockUser should be used and when doing background processing e.mu.Lock/e.mu.Unlock should be used. The distinction is described below in brief.
The reason for this locking behaviour is to avoid wakeups to handle packets. In cases where the endpoint is already locked the background processor can queue the packet up and go its merry way and the lock owner will eventually process the backlog when releasing the lock. Similarly when acquiring the lock from say a syscall goroutine we can implement a bit of spinning if we know that the lock is not held by another syscall goroutine. Background processors should never hold the lock for long and we can avoid an expensive sleep/wakeup by spinning for a shortwhile.
For more details please see the detailed documentation on e.LockUser/e.UnlockUser methods.
TODO(b/339664055): Checklocks should be used more extensively here. Coverage is currently sparse.
+stateify savable
func (*Endpoint) Accept ¶
Accept returns a new endpoint if a peer has established a connection to an endpoint previously set to listen mode.
addr if not-nil will contain the peer address of the returned endpoint.
func (*Endpoint) AssertLockHeld ¶
AssertLockHeld forces the checklocks analyzer to consider e.mu held. This is used in places where we know that e.mu is held, but checklocks does not, which can happen when creating new locked objects. You must pass the known locked endpoint to this function and it must be the same as the caller endpoint. TODO(b/226403629): Remove this function once checklocks understands local variable locks. +checklocks:locked.mu +checklocksacquire:e.mu
func (*Endpoint) Bind ¶
func (e *Endpoint) Bind(addr tcpip.FullAddress) (err tcpip.Error)
Bind binds the endpoint to a specific local port and optionally address.
func (*Endpoint) Close ¶
func (e *Endpoint) Close()
Close puts the endpoint in a closed state and frees all resources associated with it. It must be called only once and with no other concurrent calls to the endpoint.
func (*Endpoint) Connect ¶
func (e *Endpoint) Connect(addr tcpip.FullAddress) tcpip.Error
Connect connects the endpoint to its peer.
func (*Endpoint) ConnectEndpoint ¶
ConnectEndpoint is not supported.
func (*Endpoint) Disconnect ¶
Disconnect implements tcpip.Endpoint.Disconnect.
func (*Endpoint) EndpointState ¶
func (e *Endpoint) EndpointState() EndpointState
EndpointState returns the current state of the endpoint.
func (*Endpoint) GetAcceptConn ¶
GetAcceptConn implements tcpip.SocketOptionsHandler.
func (*Endpoint) GetLocalAddress ¶
func (e *Endpoint) GetLocalAddress() (tcpip.FullAddress, tcpip.Error)
GetLocalAddress returns the address to which the endpoint is bound.
func (*Endpoint) GetRemoteAddress ¶
func (e *Endpoint) GetRemoteAddress() (tcpip.FullAddress, tcpip.Error)
GetRemoteAddress returns the address to which the endpoint is connected.
func (*Endpoint) GetSockOpt ¶
func (e *Endpoint) GetSockOpt(opt tcpip.GettableSocketOption) tcpip.Error
GetSockOpt implements tcpip.Endpoint.GetSockOpt.
func (*Endpoint) GetSockOptInt ¶
GetSockOptInt implements tcpip.Endpoint.GetSockOptInt.
func (*Endpoint) HandleError ¶
func (e *Endpoint) HandleError(transErr stack.TransportError, pkt *stack.PacketBuffer)
HandleError implements stack.TransportEndpoint.
func (*Endpoint) HandlePacket ¶
func (*Endpoint) HandlePacket(stack.TransportEndpointID, *stack.PacketBuffer)
HandlePacket implements stack.TransportEndpoint.HandlePacket.
func (*Endpoint) Info ¶
func (e *Endpoint) Info() tcpip.EndpointInfo
Info returns a copy of the endpoint info.
func (*Endpoint) LastErrorLocked ¶
LastErrorLocked reads and clears lastError. Only to be used in tests. +checklocks:e.mu
func (*Endpoint) Listen ¶
Listen puts the endpoint in "listen" mode, which allows it to accept new connections.
func (*Endpoint) LockUser ¶
func (e *Endpoint) LockUser()
LockUser tries to lock e.mu and if it fails it will check if the lock is held by another syscall goroutine. If yes, then it will goto sleep waiting for the lock to be released, if not then it will spin till it acquires the lock or another syscall goroutine acquires it in which case it will goto sleep as described above.
The assumption behind spinning here being that background packet processing should not be holding the lock for long and spinning reduces latency as we avoid an expensive sleep/wakeup of the syscall goroutine). +checklocksacquire:e.mu +checklocksacquire:e.snd.ep.mu
func (*Endpoint) ModerateRecvBuf ¶
ModerateRecvBuf adjusts the receive buffer and the advertised window based on the number of bytes copied to userspace.
func (*Endpoint) OnCorkOptionSet ¶
OnCorkOptionSet implements tcpip.SocketOptionsHandler.OnCorkOptionSet.
func (*Endpoint) OnDelayOptionSet ¶
OnDelayOptionSet implements tcpip.SocketOptionsHandler.OnDelayOptionSet.
func (*Endpoint) OnKeepAliveSet ¶
OnKeepAliveSet implements tcpip.SocketOptionsHandler.OnKeepAliveSet.
func (*Endpoint) OnReuseAddressSet ¶
OnReuseAddressSet implements tcpip.SocketOptionsHandler.OnReuseAddressSet.
func (*Endpoint) OnReusePortSet ¶
OnReusePortSet implements tcpip.SocketOptionsHandler.OnReusePortSet.
func (*Endpoint) OnSetReceiveBufferSize ¶
OnSetReceiveBufferSize implements tcpip.SocketOptionsHandler.OnSetReceiveBufferSize.
func (*Endpoint) OnSetSendBufferSize ¶
OnSetSendBufferSize implements tcpip.SocketOptionsHandler.OnSetSendBufferSize.
func (*Endpoint) Read ¶
func (e *Endpoint) Read(dst io.Writer, opts tcpip.ReadOptions) (tcpip.ReadResult, tcpip.Error)
Read implements tcpip.Endpoint.Read.
func (*Endpoint) Readiness ¶
Readiness returns the current readiness of the endpoint. For example, if waiter.EventIn is set, the endpoint is immediately readable.
func (*Endpoint) Resume ¶
func (e *Endpoint) Resume()
Resume implements tcpip.ResumableEndpoint.Resume.
func (*Endpoint) ResumeWork ¶
func (e *Endpoint) ResumeWork()
ResumeWork resumes packet processing. Only to be used in tests. +checklocksrelease:e.mu
func (*Endpoint) SetOwner ¶
func (e *Endpoint) SetOwner(owner tcpip.PacketOwner)
SetOwner implements tcpip.Endpoint.SetOwner.
func (*Endpoint) SetSockOpt ¶
func (e *Endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error
SetSockOpt sets a socket option.
func (*Endpoint) SetSockOptInt ¶
SetSockOptInt sets a socket option.
func (*Endpoint) Shutdown ¶
func (e *Endpoint) Shutdown(flags tcpip.ShutdownFlags) tcpip.Error
Shutdown closes the read and/or write end of the endpoint connection to its peer.
func (*Endpoint) SocketOptions ¶
func (e *Endpoint) SocketOptions() *tcpip.SocketOptions
SocketOptions implements tcpip.Endpoint.SocketOptions.
func (*Endpoint) State ¶
State implements tcpip.Endpoint.State. It exports the endpoint's protocol state for diagnostics.
func (*Endpoint) Stats ¶
func (e *Endpoint) Stats() tcpip.EndpointStats
Stats returns a pointer to the endpoint stats.
func (*Endpoint) StopWork ¶
func (e *Endpoint) StopWork()
StopWork halts packet processing. Only to be used in tests. +checklocksacquire:e.mu
func (*Endpoint) TryLock ¶
TryLock is a helper that calls TryLock on the endpoint's mutex and adds the necessary checklocks annotations. TODO(b/226403629): Remove this once checklocks understands TryLock. +checklocksacquire:e.mu
func (*Endpoint) UnlockUser ¶
func (e *Endpoint) UnlockUser()
UnlockUser will check if there are any segments already queued for processing and wake up a processor goroutine to process them before unlocking e.mu. This is required because we when packets arrive and endpoint lock is already held then such packets are queued up to be processed.
Precondition: e.LockUser() must have been called before calling e.UnlockUser() +checklocksrelease:e.mu
func (*Endpoint) UpdateLastError ¶
UpdateLastError implements tcpip.SocketOptionsHandler.UpdateLastError.
func (*Endpoint) WakeupWriters ¶
func (e *Endpoint) WakeupWriters()
WakeupWriters implements tcpip.SocketOptionsHandler.WakeupWriters.
type EndpointState ¶
type EndpointState tcpip.EndpointState
EndpointState represents the state of a TCP endpoint.
const ( // TCP protocol states in sync with the definitions in // https://github.com/torvalds/linux/blob/7acac4b3196/include/net/tcp_states.h#L13 StateEstablished EndpointState StateSynSent StateSynRecv StateFinWait1 StateFinWait2 StateTimeWait StateClose StateCloseWait StateLastAck StateListen StateClosing // Endpoint states internal to netstack. StateInitial StateBound StateConnecting // Connect() called, but the initial SYN hasn't been sent. StateError )
Endpoint states. Note that are represented in a netstack-specific manner and may not be meaningful externally. Specifically, they need to be translated to Linux's representation for these states if presented to userspace.
func (EndpointState) String ¶
func (s EndpointState) String() string
String implements fmt.Stringer.String.
type Forwarder ¶
type Forwarder struct {
// contains filtered or unexported fields
}
Forwarder is a connection request forwarder, which allows clients to decide what to do with a connection request, for example: ignore it, send a RST, or attempt to complete the 3-way handshake.
The canonical way of using it is to pass the Forwarder.HandlePacket function to stack.SetTransportProtocolHandler.
func NewForwarder ¶
func NewForwarder(s *stack.Stack, rcvWnd, maxInFlight int, handler func(*ForwarderRequest)) *Forwarder
NewForwarder allocates and initializes a new forwarder with the given maximum number of in-flight connection attempts. Once the maximum is reached new incoming connection requests will be ignored.
If rcvWnd is set to zero, the default buffer size is used instead.
func (*Forwarder) HandlePacket ¶
func (f *Forwarder) HandlePacket(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool
HandlePacket handles a packet if it is of interest to the forwarder (i.e., if it's a SYN packet), returning true if it's the case. Otherwise the packet is not handled and false is returned.
This function is expected to be passed as an argument to the stack.SetTransportProtocolHandler function.
type ForwarderRequest ¶
type ForwarderRequest struct {
// contains filtered or unexported fields
}
ForwarderRequest represents a connection request received by the forwarder and passed to the client. Clients must eventually call Complete() on it, and may optionally create an endpoint to represent it via CreateEndpoint.
func (*ForwarderRequest) Complete ¶
func (r *ForwarderRequest) Complete(sendReset bool)
Complete completes the request, and optionally sends a RST segment back to the sender.
func (*ForwarderRequest) CreateEndpoint ¶
CreateEndpoint creates a TCP endpoint for the connection request, performing the 3-way handshake in the process.
func (*ForwarderRequest) ForwardedPacketExperimentOption ¶
func (r *ForwarderRequest) ForwardedPacketExperimentOption() (uint16, bool)
ForwardedPacketExperimentOption returns the experiment option value from the forwarded packet and a bool indicating whether an experiment option value was found.
func (*ForwarderRequest) ID ¶
func (r *ForwarderRequest) ID() stack.TransportEndpointID
ID returns the 4-tuple (src address, src port, dst address, dst port) that represents the connection request.
type RcvBufAutoTuneParams ¶
type RcvBufAutoTuneParams struct { // MeasureTime is the time at which the current measurement was // started. MeasureTime tcpip.MonotonicTime // CopiedBytes is the number of bytes copied to user space since this // measure began. CopiedBytes int // PrevCopiedBytes is the number of bytes copied to userspace in the // previous RTT period. PrevCopiedBytes int // RcvBufSize is the auto tuned receive buffer size. RcvBufSize int // RTT is the smoothed RTT as measured by observing the time between // when a byte is first acknowledged and the receipt of data that is at // least one window beyond the sequence number that was acknowledged. RTT time.Duration // RTTVar is the "round-trip time variation" as defined in section 2 of // RFC6298. RTTVar time.Duration // RTTMeasureSeqNumber is the highest acceptable sequence number at the // time this RTT measurement period began. RTTMeasureSeqNumber seqnum.Value // RTTMeasureTime is the absolute time at which the current RTT // measurement period began. RTTMeasureTime tcpip.MonotonicTime // Disabled is true if an explicit receive buffer is set for the // endpoint. Disabled bool }
RcvBufAutoTuneParams holds state related to TCP receive buffer auto-tuning.
+stateify savable
type ReceiveErrors ¶
type ReceiveErrors struct { tcpip.ReceiveErrors // SegmentQueueDropped is the number of segments dropped due to // a full segment queue. SegmentQueueDropped tcpip.StatCounter // ChecksumErrors is the number of segments dropped due to bad checksums. ChecksumErrors tcpip.StatCounter // ListenOverflowSynDrop is the number of times the listen queue overflowed // and a SYN was dropped. ListenOverflowSynDrop tcpip.StatCounter // ListenOverflowAckDrop is the number of times the final ACK // in the handshake was dropped due to overflow. ListenOverflowAckDrop tcpip.StatCounter // ZeroRcvWindowState is the number of times we advertised // a zero receive window when rcvQueue is full. ZeroRcvWindowState tcpip.StatCounter // WantZeroWindow is the number of times we wanted to advertise a // zero receive window but couldn't because it would have caused // the receive window's right edge to shrink. WantZeroRcvWindow tcpip.StatCounter }
ReceiveErrors collect segment receive errors within transport layer.
+stateify savable
type SACKInfo ¶
type SACKInfo struct { // Blocks is the maximum number of SACK blocks we track // per endpoint. Blocks [MaxSACKBlocks]header.SACKBlock // NumBlocks is the number of valid SACK blocks stored in the // blocks array above. NumBlocks int }
SACKInfo holds TCP SACK related information for a given endpoint.
+stateify savable
type SACKScoreboard ¶
type SACKScoreboard struct {
// contains filtered or unexported fields
}
SACKScoreboard stores a set of disjoint SACK ranges.
+stateify savable
func NewSACKScoreboard ¶
func NewSACKScoreboard(smss uint16, iss seqnum.Value) *SACKScoreboard
NewSACKScoreboard returns a new SACK Scoreboard.
func (*SACKScoreboard) Copy ¶
func (s *SACKScoreboard) Copy() (sackBlocks []header.SACKBlock, maxSACKED seqnum.Value)
Copy provides a copy of the SACK scoreboard.
func (*SACKScoreboard) Delete ¶
func (s *SACKScoreboard) Delete(seq seqnum.Value)
Delete removes all SACK information prior to seq.
func (*SACKScoreboard) Empty ¶
func (s *SACKScoreboard) Empty() bool
Empty returns true if the SACK scoreboard has no entries, false otherwise.
func (*SACKScoreboard) Insert ¶
func (s *SACKScoreboard) Insert(r header.SACKBlock)
Insert inserts/merges the provided SACKBlock into the scoreboard.
func (*SACKScoreboard) IsLost ¶
func (s *SACKScoreboard) IsLost(seq seqnum.Value) bool
IsLost implements the IsLost(SeqNum) operation defined in RFC3517 section 4.
This routine returns whether the given sequence number is considered to be lost. The routine returns true when either nDupAckThreshold discontiguous SACKed sequences have arrived above 'SeqNum' or (nDupAckThreshold * SMSS) bytes with sequence numbers greater than 'SeqNum' have been SACKed. Otherwise, the routine returns false.
func (*SACKScoreboard) IsRangeLost ¶
func (s *SACKScoreboard) IsRangeLost(r header.SACKBlock) bool
IsRangeLost implements the IsLost(SeqNum) operation defined in RFC 6675 section 4 but operates on a range of sequence numbers and returns true if there are at least nDupAckThreshold SACK blocks greater than the range being checked or if at least (nDupAckThreshold-1)*s.smss bytes have been SACKED with sequence numbers greater than the block being checked.
func (*SACKScoreboard) IsSACKED ¶
func (s *SACKScoreboard) IsSACKED(r header.SACKBlock) bool
IsSACKED returns true if the a given range of sequence numbers denoted by r are already covered by SACK information in the scoreboard.
func (*SACKScoreboard) MaxSACKED ¶
func (s *SACKScoreboard) MaxSACKED() seqnum.Value
MaxSACKED returns the highest sequence number ever inserted in the SACK scoreboard.
func (*SACKScoreboard) Reset ¶
func (s *SACKScoreboard) Reset()
Reset erases all known range information from the SACK scoreboard.
func (*SACKScoreboard) SMSS ¶
func (s *SACKScoreboard) SMSS() uint16
SMSS returns the sender's MSS as held by the SACK scoreboard.
func (*SACKScoreboard) Sacked ¶
func (s *SACKScoreboard) Sacked() seqnum.Size
Sacked returns the current number of bytes held in the SACK scoreboard.
func (*SACKScoreboard) String ¶
func (s *SACKScoreboard) String() string
String returns human-readable state of the scoreboard structure.
type SendErrors ¶
type SendErrors struct { tcpip.SendErrors // SegmentSendToNetworkFailed is the number of TCP segments failed to be sent // to the network endpoint. SegmentSendToNetworkFailed tcpip.StatCounter // SynSendToNetworkFailed is the number of TCP SYNs failed to be sent // to the network endpoint. SynSendToNetworkFailed tcpip.StatCounter // Retransmits is the number of TCP segments retransmitted. Retransmits tcpip.StatCounter // FastRetransmit is the number of segments retransmitted in fast // recovery. FastRetransmit tcpip.StatCounter // Timeouts is the number of times the RTO expired. Timeouts tcpip.StatCounter }
SendErrors collect segment send errors within the transport layer.
+stateify savable
type Stats ¶
type Stats struct { // SegmentsReceived is the number of TCP segments received that // the transport layer successfully parsed. SegmentsReceived tcpip.StatCounter // SegmentsSent is the number of TCP segments sent. SegmentsSent tcpip.StatCounter // FailedConnectionAttempts is the number of times we saw Connect and // Accept errors. FailedConnectionAttempts tcpip.StatCounter // ReceiveErrors collects segment receive errors within the // transport layer. ReceiveErrors ReceiveErrors // ReadErrors collects segment read errors from an endpoint read call. ReadErrors tcpip.ReadErrors // SendErrors collects segment send errors within the transport layer. SendErrors SendErrors // WriteErrors collects segment write errors from an endpoint write call. WriteErrors tcpip.WriteErrors }
Stats holds statistics about the endpoint.
+stateify savable
func (*Stats) IsEndpointStats ¶
func (*Stats) IsEndpointStats()
IsEndpointStats is an empty method to implement the tcpip.EndpointStats marker interface.
type TCPCubicState ¶
type TCPCubicState struct { // WLastMax is the previous wMax value. WLastMax float64 // WMax is the value of the congestion window at the time of the last // congestion event. WMax float64 // T is the time when the current congestion avoidance was entered. T tcpip.MonotonicTime // TimeSinceLastCongestion denotes the time since the current // congestion avoidance was entered. TimeSinceLastCongestion time.Duration // C is the cubic constant as specified in RFC8312, page 11. C float64 // K is the time period (in seconds) that the above function takes to // increase the current window size to WMax if there are no further // congestion events and is calculated using the following equation: // // K = cubic_root(WMax*(1-beta_cubic)/C) (Eq. 2, page 5) K float64 // Beta is the CUBIC multiplication decrease factor. That is, when a // congestion event is detected, CUBIC reduces its cwnd to // WC(0)=WMax*beta_cubic. Beta float64 // WC is window computed by CUBIC at time TimeSinceLastCongestion. It's // calculated using the formula: // // WC(TimeSinceLastCongestion) = C*(t-K)^3 + WMax (Eq. 1) WC float64 // WEst is the window computed by CUBIC at time // TimeSinceLastCongestion+RTT i.e WC(TimeSinceLastCongestion+RTT). WEst float64 // EndSeq is the sequence number that, when cumulatively ACK'd, ends the // HyStart round. EndSeq seqnum.Value // CurrRTT is the minimum round-trip time from the current round. CurrRTT time.Duration // LastRTT is the minimum round-trip time from the previous round. LastRTT time.Duration // SampleCount is the number of samples from the current round. SampleCount uint // LastAck is the time we received the most recent ACK (or start of round if // more recent). LastAck tcpip.MonotonicTime // RoundStart is the time we started the most recent HyStart round. RoundStart tcpip.MonotonicTime }
TCPCubicState is used to hold a copy of the internal cubic state when the TCPProbeFunc is invoked.
+stateify savable
type TCPEndpointID ¶
type TCPEndpointID struct { // LocalPort is the local port associated with the endpoint. LocalPort uint16 // LocalAddress is the local [network layer] address associated with // the endpoint. LocalAddress tcpip.Address // RemotePort is the remote port associated with the endpoint. RemotePort uint16 // RemoteAddress it the remote [network layer] address associated with // the endpoint. RemoteAddress tcpip.Address }
TCPEndpointID is the unique 4 tuple that identifies a given endpoint.
+stateify savable
type TCPEndpointState ¶
type TCPEndpointState struct { // TCPEndpointStateInner contains the members of TCPEndpointState used // by the endpoint's internal implementation. TCPEndpointStateInner // ID is a copy of the TransportEndpointID for the endpoint. ID TCPEndpointID // SegTime denotes the absolute time when this segment was received. SegTime tcpip.MonotonicTime // RcvBufState contains information about the state of the endpoint's // receive socket buffer. RcvBufState TCPRcvBufState // SndBufState contains information about the state of the endpoint's // send socket buffer. SndBufState TCPSndBufState // SACK holds TCP SACK related information for this endpoint. SACK TCPSACKInfo // Receiver holds variables related to the TCP receiver for the // endpoint. Receiver TCPReceiverState // Sender holds state related to the TCP Sender for the endpoint. Sender TCPSenderState }
TCPEndpointState is a copy of the internal state of a TCP endpoint.
+stateify savable
type TCPEndpointStateInner ¶
type TCPEndpointStateInner struct { // TSOffset is a randomized offset added to the value of the TSVal // field in the timestamp option. TSOffset tcp.TSOffset // SACKPermitted is set to true if the peer sends the TCPSACKPermitted // option in the SYN/SYN-ACK. SACKPermitted bool // SendTSOk is used to indicate when the TS Option has been negotiated. // When sendTSOk is true every non-RST segment should carry a TS as per // RFC7323#section-1.1. SendTSOk bool // RecentTS is the timestamp that should be sent in the TSEcr field of // the timestamp for future segments sent by the endpoint. This field // is updated if required when a new segment is received by this // endpoint. RecentTS uint32 }
TCPEndpointStateInner contains the members of TCPEndpointState used directly (that is, not within another containing struct) within the endpoint's internal implementation.
+stateify savable
type TCPFastRecoveryState ¶
type TCPFastRecoveryState struct { // Active if true indicates the endpoint is in fast recovery. The // following fields are only meaningful when Active is true. Active bool // First is the first unacknowledged sequence number being recovered. First seqnum.Value // Last is the 'recover' sequence number that indicates the point at // which we should exit recovery barring any timeouts etc. Last seqnum.Value // MaxCwnd is the maximum value we are permitted to grow the congestion // window during recovery. This is set at the time we enter recovery. // It exists to avoid attacks where the receiver intentionally sends // duplicate acks to artificially inflate the sender's cwnd. MaxCwnd int // HighRxt is the highest sequence number which has been retransmitted // during the current loss recovery phase. See: RFC 6675 Section 2 for // details. HighRxt seqnum.Value // RescueRxt is the highest sequence number which has been // optimistically retransmitted to prevent stalling of the ACK clock // when there is loss at the end of the window and no new data is // available for transmission. See: RFC 6675 Section 2 for details. RescueRxt seqnum.Value }
TCPFastRecoveryState holds a copy of the internal fast recovery state of a TCP endpoint.
+stateify savable
type TCPProbeFunc ¶
type TCPProbeFunc func(s *TCPEndpointState)
TCPProbeFunc is the expected function type for a TCP probe function to be passed to stack.AddTCPProbe.
type TCPRACKState ¶
type TCPRACKState struct { // XmitTime is the transmission timestamp of the most recent // acknowledged segment. XmitTime tcpip.MonotonicTime // EndSequence is the ending TCP sequence number of the most recent // acknowledged segment. EndSequence seqnum.Value // FACK is the highest selectively or cumulatively acknowledged // sequence. FACK seqnum.Value // RTT is the round trip time of the most recently delivered packet on // the connection (either cumulatively acknowledged or selectively // acknowledged) that was not marked invalid as a possible spurious // retransmission. RTT time.Duration // Reord is true iff reordering has been detected on this connection. Reord bool // DSACKSeen is true iff the connection has seen a DSACK. DSACKSeen bool // ReoWnd is the reordering window time used for recording packet // transmission times. It is used to defer the moment at which RACK // marks a packet lost. ReoWnd time.Duration // ReoWndIncr is the multiplier applied to adjust reorder window. ReoWndIncr uint8 // ReoWndPersist is the number of loss recoveries before resetting // reorder window. ReoWndPersist int8 // RTTSeq is the SND.NXT when RTT is updated. RTTSeq seqnum.Value }
TCPRACKState is used to hold a copy of the internal RACK state when the TCPProbeFunc is invoked.
+stateify savable
type TCPRTTState ¶
type TCPRTTState struct { // SRTT is the smoothed round trip time defined in section 2 of RFC // 6298. SRTT time.Duration // RTTVar is the round-trip time variation as defined in section 2 of // RFC 6298. RTTVar time.Duration // SRTTInited if true indicates that a valid RTT measurement has been // completed. SRTTInited bool }
TCPRTTState holds a copy of information about the endpoint's round trip time.
+stateify savable
type TCPRcvBufState ¶
type TCPRcvBufState struct { // RcvBufUsed is the amount of bytes actually held in the receive // socket buffer for the endpoint. RcvBufUsed int // RcvBufAutoTuneParams is used to hold state variables to compute the // auto tuned receive buffer size. RcvAutoParams RcvBufAutoTuneParams // RcvClosed if true, indicates the endpoint has been closed for // reading. RcvClosed bool }
TCPRcvBufState contains information about the state of an endpoint's receive socket buffer.
+stateify savable
type TCPReceiverState ¶
type TCPReceiverState struct { // RcvNxt is the TCP variable RCV.NXT. RcvNxt seqnum.Value // RcvAcc is one beyond the last acceptable sequence number. That is, // the "largest" sequence value that the receiver has announced to its // peer that it's willing to accept. This may be different than RcvNxt // + (last advertised receive window) if the receive window is reduced; // in that case we have to reduce the window as we receive more data // instead of shrinking it. RcvAcc seqnum.Value // RcvWndScale is the window scaling to use for inbound segments. RcvWndScale uint8 // PendingBufUsed is the number of bytes pending in the receive queue. PendingBufUsed int }
TCPReceiverState holds a copy of the internal state of the receiver for a given TCP endpoint.
+stateify savable
type TCPSACKInfo ¶
type TCPSACKInfo struct { // Blocks is the list of SACK Blocks that identify the out of order // segments held by a given TCP endpoint. Blocks []header.SACKBlock // ReceivedBlocks are the SACK blocks received by this endpoint from // the peer endpoint. ReceivedBlocks []header.SACKBlock // MaxSACKED is the highest sequence number that has been SACKED by the // peer. MaxSACKED seqnum.Value }
TCPSACKInfo holds TCP SACK related information for a given TCP endpoint.
+stateify savable
type TCPSenderState ¶
type TCPSenderState struct { // LastSendTime is the timestamp at which we sent the last segment. LastSendTime tcpip.MonotonicTime // DupAckCount is the number of Duplicate ACKs received. It is used for // fast retransmit. DupAckCount int // SndCwnd is the size of the sending congestion window in packets. SndCwnd int // Ssthresh is the threshold between slow start and congestion // avoidance. Ssthresh int // SndCAAckCount is the number of packets acknowledged during // congestion avoidance. When enough packets have been ack'd (typically // cwnd packets), the congestion window is incremented by one. SndCAAckCount int // Outstanding is the number of packets that have been sent but not yet // acknowledged. Outstanding int // SackedOut is the number of packets which have been selectively // acked. SackedOut int // SndWnd is the send window size in bytes. SndWnd seqnum.Size // SndUna is the next unacknowledged sequence number. SndUna seqnum.Value // SndNxt is the sequence number of the next segment to be sent. SndNxt seqnum.Value // RTTMeasureSeqNum is the sequence number being used for the latest // RTT measurement. RTTMeasureSeqNum seqnum.Value // RTTMeasureTime is the time when the RTTMeasureSeqNum was sent. RTTMeasureTime tcpip.MonotonicTime // Closed indicates that the caller has closed the endpoint for // sending. Closed bool // RTO is the retransmit timeout as defined in section of 2 of RFC // 6298. RTO time.Duration // RTTState holds information about the endpoint's round trip time. RTTState TCPRTTState // MaxPayloadSize is the maximum size of the payload of a given // segment. It is initialized on demand. MaxPayloadSize int // SndWndScale is the number of bits to shift left when reading the // send window size from a segment. SndWndScale uint8 // MaxSentAck is the highest acknowledgement number sent till now. MaxSentAck seqnum.Value // FastRecovery holds the fast recovery state for the endpoint. FastRecovery TCPFastRecoveryState // Cubic holds the state related to CUBIC congestion control. Cubic TCPCubicState // RACKState holds the state related to RACK loss detection algorithm. RACKState TCPRACKState // RetransmitTS records the timestamp used to detect spurious recovery. RetransmitTS uint32 // SpuriousRecovery indicates if the sender entered recovery spuriously. SpuriousRecovery bool }
TCPSenderState holds a copy of the internal state of the sender for a given TCP Endpoint.
+stateify savable
type TCPSndBufState ¶
type TCPSndBufState struct { // SndBufSize is the size of the socket send buffer. SndBufSize int // SndBufUsed is the number of bytes held in the socket send buffer. SndBufUsed int // SndClosed indicates that the endpoint has been closed for sends. SndClosed bool // PacketTooBigCount is used to notify the main protocol routine how // many times a "packet too big" control packet is received. PacketTooBigCount int // SndMTU is the smallest MTU seen in the control packets received. SndMTU int // AutoTuneSndBufDisabled indicates that the auto tuning of send buffer // is disabled. AutoTuneSndBufDisabled atomicbitops.Uint32 }
TCPSndBufState contains information about the state of an endpoint's send socket buffer.
+stateify savable
Source Files ¶
- accept.go
- connect.go
- connect_unsafe.go
- cubic.go
- dispatcher.go
- endpoint.go
- endpoint_state.go
- forwarder.go
- protocol.go
- rack.go
- rcv.go
- reno.go
- reno_recovery.go
- sack.go
- sack_recovery.go
- sack_scoreboard.go
- segment.go
- segment_heap.go
- segment_queue.go
- segment_state.go
- segment_unsafe.go
- snd.go
- state.go
- timer.go
Directories ¶
Path | Synopsis |
---|---|
test
|
|
e2e
Package e2e contains definitions common to all e2e tcp tests.
|
Package e2e contains definitions common to all e2e tcp tests. |
testing
|
|
context
Package context provides a test context for use in tcp tests.
|
Package context provides a test context for use in tcp tests. |