overlay

package
v1.13.0-rc Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 16, 2020 License: AGPL-3.0 Imports: 14 Imported by: 2

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrEmptyNode = errs.New("empty node ID")

ErrEmptyNode is returned when the nodeID is empty.

View Source
var ErrNodeDisqualified = errs.Class("node is disqualified")

ErrNodeDisqualified is returned if a nodes is disqualified.

View Source
var ErrNodeFinishedGE = errs.Class("node finished graceful exit")

ErrNodeFinishedGE is returned if a node has finished graceful exit.

View Source
var ErrNodeNotFound = errs.Class("node not found")

ErrNodeNotFound is returned if a node does not exist in database.

View Source
var ErrNodeOffline = errs.Class("node is offline")

ErrNodeOffline is returned if a nodes is offline.

View Source
var ErrNotEnoughNodes = errs.Class("not enough nodes")

ErrNotEnoughNodes is when selecting nodes failed with the given parameters.

View Source
var (

	// Error represents an overlay error.
	Error = errs.Class("overlay error")
)

Functions

func ResolveIPAndNetwork added in v0.35.2

func ResolveIPAndNetwork(ctx context.Context, target string) (ipPort, network string, err error)

ResolveIPAndNetwork resolves the target address and determines its IP and /24 subnet IPv4 or /64 subnet IPv6.

Types

type AuditHistoryConfig added in v1.10.1

type AuditHistoryConfig struct {
	WindowSize       time.Duration `help:"The length of time spanning a single audit window" releaseDefault:"12h" devDefault:"5m"`
	TrackingPeriod   time.Duration `` /* 127-byte string literal not displayed */
	GracePeriod      time.Duration `` /* 236-byte string literal not displayed */
	OfflineThreshold float64       `` /* 226-byte string literal not displayed */
}

AuditHistoryConfig is a configuration struct defining time periods and thresholds for penalizing nodes for being offline. It is used for downtime suspension and disqualification.

type AuditType added in v1.1.1

type AuditType int

AuditType is an enum representing the outcome of a particular audit reported to the overlay.

const (
	// AuditSuccess represents a successful audit.
	AuditSuccess AuditType = iota
	// AuditFailure represents a failed audit.
	AuditFailure
	// AuditUnknown represents an audit that resulted in an unknown error from the node.
	AuditUnknown
)

type CacheConfig added in v1.3.2

type CacheConfig struct {
	Disabled  bool          `help:"disable node cache" default:"false"`
	Staleness time.Duration `help:"how stale the node selection cache can be" releaseDefault:"3m" devDefault:"5m"`
}

CacheConfig is a configuration for overlay node selection cache.

type CacheDB added in v1.3.2

type CacheDB interface {
	// SelectAllStorageNodesUpload returns all nodes that qualify to store data, organized as reputable nodes and new nodes
	SelectAllStorageNodesUpload(ctx context.Context, selectionCfg NodeSelectionConfig) (reputable, new []*SelectedNode, err error)
}

CacheDB implements the database for overlay node selection cache

architecture: Database

type Config

type Config struct {
	Node                 NodeSelectionConfig
	NodeSelectionCache   CacheConfig
	UpdateStatsBatchSize int `help:"number of update requests to process per transaction" default:"100"`
	AuditHistory         AuditHistoryConfig
}

Config is a configuration for overlay service.

type DB

type DB interface {
	// GetOnlineNodesForGetDelete returns a map of nodes for the supplied nodeIDs
	GetOnlineNodesForGetDelete(ctx context.Context, nodeIDs []storj.NodeID, onlineWindow time.Duration) (map[storj.NodeID]*SelectedNode, error)
	// SelectStorageNodes looks up nodes based on criteria
	SelectStorageNodes(ctx context.Context, totalNeededNodes, newNodeCount int, criteria *NodeCriteria) ([]*SelectedNode, error)
	// SelectAllStorageNodesUpload returns all nodes that qualify to store data, organized as reputable nodes and new nodes
	SelectAllStorageNodesUpload(ctx context.Context, selectionCfg NodeSelectionConfig) (reputable, new []*SelectedNode, err error)

	// Get looks up the node by nodeID
	Get(ctx context.Context, nodeID storj.NodeID) (*NodeDossier, error)
	// KnownOffline filters a set of nodes to offline nodes
	KnownOffline(context.Context, *NodeCriteria, storj.NodeIDList) (storj.NodeIDList, error)
	// KnownUnreliableOrOffline filters a set of nodes to unhealth or offlines node, independent of new
	KnownUnreliableOrOffline(context.Context, *NodeCriteria, storj.NodeIDList) (storj.NodeIDList, error)
	// KnownReliable filters a set of nodes to reliable (online and qualified) nodes.
	KnownReliable(ctx context.Context, onlineWindow time.Duration, nodeIDs storj.NodeIDList) ([]*pb.Node, error)
	// Reliable returns all nodes that are reliable
	Reliable(context.Context, *NodeCriteria) (storj.NodeIDList, error)
	// BatchUpdateStats updates multiple storagenode's stats in one transaction.
	BatchUpdateStats(ctx context.Context, updateRequests []*UpdateRequest, batchSize int, now time.Time) (failed storj.NodeIDList, err error)
	// UpdateStats all parts of single storagenode's stats.
	UpdateStats(ctx context.Context, request *UpdateRequest, now time.Time) (stats *NodeStats, err error)
	// UpdateNodeInfo updates node dossier with info requested from the node itself like node type, email, wallet, capacity, and version.
	UpdateNodeInfo(ctx context.Context, node storj.NodeID, nodeInfo *InfoResponse) (stats *NodeDossier, err error)
	// UpdateUptime updates a single storagenode's uptime stats.
	UpdateUptime(ctx context.Context, nodeID storj.NodeID, isUp bool) (stats *NodeStats, err error)
	// UpdateCheckIn updates a single storagenode's check-in stats.
	UpdateCheckIn(ctx context.Context, node NodeCheckInInfo, timestamp time.Time, config NodeSelectionConfig) (err error)

	// UpdateAuditHistory updates a node's audit history with an online or offline audit and returns the online score for the tracking period.
	UpdateAuditHistory(ctx context.Context, nodeID storj.NodeID, auditTime time.Time, online bool, config AuditHistoryConfig) (onlineScore float64, err error)

	// AllPieceCounts returns a map of node IDs to piece counts from the db.
	AllPieceCounts(ctx context.Context) (pieceCounts map[storj.NodeID]int, err error)
	// UpdatePieceCounts sets the piece count field for the given node IDs.
	UpdatePieceCounts(ctx context.Context, pieceCounts map[storj.NodeID]int) (err error)

	// UpdateExitStatus is used to update a node's graceful exit status.
	UpdateExitStatus(ctx context.Context, request *ExitStatusRequest) (_ *NodeDossier, err error)
	// GetExitingNodes returns nodes who have initiated a graceful exit, but have not completed it.
	GetExitingNodes(ctx context.Context) (exitingNodes []*ExitStatus, err error)
	// GetGracefulExitCompletedByTimeFrame returns nodes who have completed graceful exit within a time window (time window is around graceful exit completion).
	GetGracefulExitCompletedByTimeFrame(ctx context.Context, begin, end time.Time) (exitedNodes storj.NodeIDList, err error)
	// GetGracefulExitIncompleteByTimeFrame returns nodes who have initiated, but not completed graceful exit within a time window (time window is around graceful exit initiation).
	GetGracefulExitIncompleteByTimeFrame(ctx context.Context, begin, end time.Time) (exitingNodes storj.NodeIDList, err error)
	// GetExitStatus returns a node's graceful exit status.
	GetExitStatus(ctx context.Context, nodeID storj.NodeID) (exitStatus *ExitStatus, err error)

	// GetNodesNetwork returns the /24 subnet for each storage node, order is not guaranteed.
	GetNodesNetwork(ctx context.Context, nodeIDs []storj.NodeID) (nodeNets []string, err error)

	// GetSuccesfulNodesNotCheckedInSince returns all nodes that last check-in was successful, but haven't checked-in within a given duration.
	GetSuccesfulNodesNotCheckedInSince(ctx context.Context, duration time.Duration) (nodeAddresses []NodeLastContact, err error)
	// GetOfflineNodesLimited returns a list of the first N offline nodes ordered by least recently contacted.
	GetOfflineNodesLimited(ctx context.Context, limit int) ([]NodeLastContact, error)

	// DisqualifyNode disqualifies a storage node.
	DisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error)

	// SuspendNodeUnknownAudit suspends a storage node for unknown audits.
	SuspendNodeUnknownAudit(ctx context.Context, nodeID storj.NodeID, suspendedAt time.Time) (err error)
	// UnsuspendNodeUnknownAudit unsuspends a storage node for unknown audits.
	UnsuspendNodeUnknownAudit(ctx context.Context, nodeID storj.NodeID) (err error)

	// TestVetNode directly sets a node's vetted_at timestamp to make testing easier
	TestVetNode(ctx context.Context, nodeID storj.NodeID) (vettedTime *time.Time, err error)
	// TestUnvetNode directly sets a node's vetted_at timestamp to null to make testing easier
	TestUnvetNode(ctx context.Context, nodeID storj.NodeID) (err error)
}

DB implements the database for overlay.Service

architecture: Database

type ExitStatus added in v0.24.0

type ExitStatus struct {
	NodeID              storj.NodeID
	ExitInitiatedAt     *time.Time
	ExitLoopCompletedAt *time.Time
	ExitFinishedAt      *time.Time
	ExitSuccess         bool
}

ExitStatus is used for reading graceful exit status.

type ExitStatusRequest added in v0.22.0

type ExitStatusRequest struct {
	NodeID              storj.NodeID
	ExitInitiatedAt     time.Time
	ExitLoopCompletedAt time.Time
	ExitFinishedAt      time.Time
	ExitSuccess         bool
}

ExitStatusRequest is used to update a node's graceful exit status.

type FindStorageNodesRequest

type FindStorageNodesRequest struct {
	RequestedCount int
	ExcludedIDs    []storj.NodeID
	MinimumVersion string // semver or empty
}

FindStorageNodesRequest defines easy request parameters.

type InfoResponse added in v1.7.1

type InfoResponse struct {
	Type     pb.NodeType
	Operator *pb.NodeOperator
	Capacity *pb.NodeCapacity
	Version  *pb.NodeVersion
}

InfoResponse contains node dossier info requested from the storage node.

type Inspector

type Inspector struct {
	// contains filtered or unexported fields
}

Inspector is a RPC service for inspecting overlay internals

architecture: Endpoint

func NewInspector

func NewInspector(service *Service) *Inspector

NewInspector creates an Inspector.

func (*Inspector) CountNodes

func (srv *Inspector) CountNodes(ctx context.Context, req *pb.CountNodesRequest) (_ *pb.CountNodesResponse, err error)

CountNodes returns the number of nodes in the overlay.

func (*Inspector) DumpNodes

func (srv *Inspector) DumpNodes(ctx context.Context, req *pb.DumpNodesRequest) (_ *pb.DumpNodesResponse, err error)

DumpNodes returns all of the nodes in the overlay.

type NodeCheckInInfo added in v0.21.3

type NodeCheckInInfo struct {
	NodeID     storj.NodeID
	Address    *pb.NodeAddress
	LastNet    string
	LastIPPort string
	IsUp       bool
	Operator   *pb.NodeOperator
	Capacity   *pb.NodeCapacity
	Version    *pb.NodeVersion
}

NodeCheckInInfo contains all the info that will be updated when a node checkins.

type NodeCriteria

type NodeCriteria struct {
	FreeDisk         int64
	ExcludedIDs      []storj.NodeID
	ExcludedNetworks []string // the /24 subnet IPv4 or /64 subnet IPv6 for nodes
	MinimumVersion   string   // semver or empty
	OnlineWindow     time.Duration
	DistinctIP       bool
}

NodeCriteria are the requirements for selecting nodes.

type NodeDossier

type NodeDossier struct {
	pb.Node
	Type                  pb.NodeType
	Operator              pb.NodeOperator
	Capacity              pb.NodeCapacity
	Reputation            NodeStats
	Version               pb.NodeVersion
	Contained             bool
	Disqualified          *time.Time
	UnknownAuditSuspended *time.Time
	OfflineSuspended      *time.Time
	OfflineUnderReview    *time.Time
	PieceCount            int64
	ExitStatus            ExitStatus
	CreatedAt             time.Time
	LastNet               string
	LastIPPort            string
}

NodeDossier is the complete info that the satellite tracks for a storage node.

type NodeLastContact added in v0.29.0

type NodeLastContact struct {
	URL                storj.NodeURL
	LastIPPort         string
	LastContactSuccess time.Time
	LastContactFailure time.Time
}

NodeLastContact contains the ID, address, and timestamp.

type NodeSelectionCache added in v1.3.2

type NodeSelectionCache struct {
	// contains filtered or unexported fields
}

NodeSelectionCache keeps a list of all the storage nodes that are qualified to store data We organize the nodes by if they are reputable or a new node on the network. The cache will sync with the nodes table in the database and get refreshed once the staleness time has past.

func NewNodeSelectionCache added in v1.3.2

func NewNodeSelectionCache(log *zap.Logger, db CacheDB, staleness time.Duration, config NodeSelectionConfig) *NodeSelectionCache

NewNodeSelectionCache creates a new cache that keeps a list of all the storage nodes that are qualified to store data.

func (*NodeSelectionCache) GetNodes added in v1.3.2

func (cache *NodeSelectionCache) GetNodes(ctx context.Context, req FindStorageNodesRequest) (_ []*SelectedNode, err error)

GetNodes selects nodes from the cache that will be used to upload a file. Every node selected will be from a distinct network. If the cache hasn't been refreshed recently it will do so first.

func (*NodeSelectionCache) Refresh added in v1.3.2

func (cache *NodeSelectionCache) Refresh(ctx context.Context) (err error)

Refresh populates the cache with all of the reputableNodes and newNode nodes This method is useful for tests.

func (*NodeSelectionCache) Size added in v1.3.2

func (cache *NodeSelectionCache) Size() (reputableNodeCount int, newNodeCount int)

Size returns how many reputable nodes and new nodes are in the cache.

type NodeSelectionConfig

type NodeSelectionConfig struct {
	UptimeCount      int64         `` /* 127-byte string literal not displayed */
	AuditCount       int64         `help:"the number of times a node has been audited to not be considered a New Node" releaseDefault:"100" devDefault:"0"`
	NewNodeFraction  float64       `help:"the fraction of new nodes allowed per request" releaseDefault:"0.05" devDefault:"1"`
	MinimumVersion   string        `help:"the minimum node software version for node selection queries" default:""`
	OnlineWindow     time.Duration `help:"the amount of time without seeing a node before its considered offline" default:"4h"`
	DistinctIP       bool          `help:"require distinct IPs when choosing nodes for upload" releaseDefault:"true" devDefault:"false"`
	MinimumDiskSpace memory.Size   `help:"how much disk space a node at minimum must have to be selected for upload" default:"500.00MB"`

	AuditReputationRepairWeight float64       `help:"weight to apply to audit reputation for total repair reputation calculation" default:"1.0"`
	AuditReputationUplinkWeight float64       `help:"weight to apply to audit reputation for total uplink reputation calculation" default:"1.0"`
	AuditReputationLambda       float64       `help:"the forgetting factor used to calculate the audit SNs reputation" default:"0.95"`
	AuditReputationWeight       float64       `help:"the normalization weight used to calculate the audit SNs reputation" default:"1.0"`
	AuditReputationDQ           float64       `help:"the reputation cut-off for disqualifying SNs based on audit history" default:"0.6"`
	SuspensionGracePeriod       time.Duration `help:"the time period that must pass before suspended nodes will be disqualified" releaseDefault:"168h" devDefault:"1h"`
	SuspensionDQEnabled         bool          `` /* 153-byte string literal not displayed */
}

NodeSelectionConfig is a configuration struct to determine the minimum values for nodes to select.

type NodeStats

type NodeStats struct {
	Latency90                   int64
	VettedAt                    *time.Time
	AuditSuccessCount           int64
	AuditCount                  int64
	UptimeSuccessCount          int64
	UptimeCount                 int64
	LastContactSuccess          time.Time
	LastContactFailure          time.Time
	AuditReputationAlpha        float64
	AuditReputationBeta         float64
	Disqualified                *time.Time
	UnknownAuditReputationAlpha float64
	UnknownAuditReputationBeta  float64
	UnknownAuditSuspended       *time.Time
	OfflineUnderReview          *time.Time
	OfflineSuspended            *time.Time
	OnlineScore                 float64
}

NodeStats contains statistics about a node.

type PeerIdentities added in v0.19.0

type PeerIdentities interface {
	// Set adds a peer identity entry for a node
	Set(context.Context, storj.NodeID, *identity.PeerIdentity) error
	// Get gets peer identity
	Get(context.Context, storj.NodeID) (*identity.PeerIdentity, error)
	// BatchGet gets all nodes peer identities in a transaction
	BatchGet(context.Context, storj.NodeIDList) ([]*identity.PeerIdentity, error)
}

PeerIdentities stores storagenode peer identities

architecture: Database

type SelectedNode added in v1.1.1

type SelectedNode struct {
	ID         storj.NodeID
	Address    *pb.NodeAddress
	LastNet    string
	LastIPPort string
}

SelectedNode is used as a result for creating orders limits.

func (*SelectedNode) Clone added in v1.4.2

func (node *SelectedNode) Clone() *SelectedNode

Clone returns a deep clone of the selected node.

type Service added in v0.17.0

type Service struct {
	SelectionCache *NodeSelectionCache
	// contains filtered or unexported fields
}

Service is used to store and handle node information

architecture: Service

func NewService added in v0.17.0

func NewService(log *zap.Logger, db DB, config Config) *Service

NewService returns a new Service.

func (*Service) BatchUpdateStats added in v0.17.0

func (service *Service) BatchUpdateStats(ctx context.Context, requests []*UpdateRequest) (failed storj.NodeIDList, err error)

BatchUpdateStats updates multiple storagenode's stats in one transaction.

func (*Service) Close added in v0.17.0

func (service *Service) Close() error

Close closes resources.

func (*Service) DisqualifyNode added in v0.29.0

func (service *Service) DisqualifyNode(ctx context.Context, nodeID storj.NodeID) (err error)

DisqualifyNode disqualifies a storage node.

func (*Service) FindStorageNodesForGracefulExit added in v1.4.1

func (service *Service) FindStorageNodesForGracefulExit(ctx context.Context, req FindStorageNodesRequest) (_ []*SelectedNode, err error)

FindStorageNodesForGracefulExit searches the overlay network for nodes that meet the provided requirements for graceful-exit requests.

The main difference between this method and the normal FindStorageNodes is that here we avoid using the cache.

func (*Service) FindStorageNodesForUpload added in v1.4.1

func (service *Service) FindStorageNodesForUpload(ctx context.Context, req FindStorageNodesRequest) (_ []*SelectedNode, err error)

FindStorageNodesForUpload searches the overlay network for nodes that meet the provided requirements for upload.

When enabled it uses the cache to select nodes. When the node selection from the cache fails, it falls back to the old implementation.

func (*Service) FindStorageNodesWithPreferences added in v0.17.0

func (service *Service) FindStorageNodesWithPreferences(ctx context.Context, req FindStorageNodesRequest, preferences *NodeSelectionConfig) (nodes []*SelectedNode, err error)

FindStorageNodesWithPreferences searches the overlay network for nodes that meet the provided criteria.

This does not use a cache.

func (*Service) Get added in v0.17.0

func (service *Service) Get(ctx context.Context, nodeID storj.NodeID) (_ *NodeDossier, err error)

Get looks up the provided nodeID from the overlay.

func (*Service) GetMissingPieces added in v0.17.0

func (service *Service) GetMissingPieces(ctx context.Context, pieces []*pb.RemotePiece) (missingPieces []int32, err error)

GetMissingPieces returns the list of offline nodes.

func (*Service) GetOfflineNodesLimited added in v0.29.8

func (service *Service) GetOfflineNodesLimited(ctx context.Context, limit int) (offlineNodes []NodeLastContact, err error)

GetOfflineNodesLimited returns a list of the first N offline nodes ordered by least recently contacted.

func (*Service) GetOnlineNodesForGetDelete added in v1.1.1

func (service *Service) GetOnlineNodesForGetDelete(ctx context.Context, nodeIDs []storj.NodeID) (_ map[storj.NodeID]*SelectedNode, err error)

GetOnlineNodesForGetDelete returns a map of nodes for the supplied nodeIDs.

func (*Service) GetSuccesfulNodesNotCheckedInSince added in v0.29.0

func (service *Service) GetSuccesfulNodesNotCheckedInSince(ctx context.Context, duration time.Duration) (nodeLastContacts []NodeLastContact, err error)

GetSuccesfulNodesNotCheckedInSince returns all nodes that last check-in was successful, but haven't checked-in within a given duration.

func (*Service) Inspect added in v0.17.0

func (service *Service) Inspect(ctx context.Context) (_ storage.Keys, err error)

Inspect lists limited number of items in the cache.

func (*Service) IsOnline added in v0.17.0

func (service *Service) IsOnline(node *NodeDossier) bool

IsOnline checks if a node is 'online' based on the collected statistics.

func (*Service) KnownOffline added in v0.17.0

func (service *Service) KnownOffline(ctx context.Context, nodeIds storj.NodeIDList) (offlineNodes storj.NodeIDList, err error)

KnownOffline filters a set of nodes to offline nodes.

func (*Service) KnownReliable added in v0.29.0

func (service *Service) KnownReliable(ctx context.Context, nodeIDs storj.NodeIDList) (nodes []*pb.Node, err error)

KnownReliable filters a set of nodes to reliable (online and qualified) nodes.

func (*Service) KnownUnreliableOrOffline added in v0.17.0

func (service *Service) KnownUnreliableOrOffline(ctx context.Context, nodeIds storj.NodeIDList) (badNodes storj.NodeIDList, err error)

KnownUnreliableOrOffline filters a set of nodes to unhealth or offlines node, independent of new.

func (*Service) Reliable added in v0.17.0

func (service *Service) Reliable(ctx context.Context) (nodes storj.NodeIDList, err error)

Reliable filters a set of nodes that are reliable, independent of new.

func (*Service) TestUnvetNode added in v1.13.1

func (service *Service) TestUnvetNode(ctx context.Context, nodeID storj.NodeID) (err error)

TestUnvetNode directly sets a node's vetted_at timestamp to null to make testing easier.

func (*Service) TestVetNode added in v1.13.1

func (service *Service) TestVetNode(ctx context.Context, nodeID storj.NodeID) (vettedTime *time.Time, err error)

TestVetNode directly sets a node's vetted_at timestamp to make testing easier.

func (*Service) UpdateCheckIn added in v0.21.3

func (service *Service) UpdateCheckIn(ctx context.Context, node NodeCheckInInfo, timestamp time.Time) (err error)

UpdateCheckIn updates a single storagenode's check-in info.

func (*Service) UpdateNodeInfo added in v0.17.0

func (service *Service) UpdateNodeInfo(ctx context.Context, node storj.NodeID, nodeInfo *InfoResponse) (stats *NodeDossier, err error)

UpdateNodeInfo updates node dossier with info requested from the node itself like node type, email, wallet, capacity, and version.

func (*Service) UpdateStats added in v0.17.0

func (service *Service) UpdateStats(ctx context.Context, request *UpdateRequest) (stats *NodeStats, err error)

UpdateStats all parts of single storagenode's stats.

func (*Service) UpdateUptime added in v0.17.0

func (service *Service) UpdateUptime(ctx context.Context, nodeID storj.NodeID, isUp bool) (stats *NodeStats, err error)

UpdateUptime updates a single storagenode's uptime stats.

type UpdateRequest

type UpdateRequest struct {
	NodeID       storj.NodeID
	AuditOutcome AuditType
	IsUp         bool
	// n.b. these are set values from the satellite.
	// They are part of the UpdateRequest struct in order to be
	// more easily accessible in satellite/satellitedb/overlaycache.go.
	AuditLambda               float64
	AuditWeight               float64
	AuditDQ                   float64
	SuspensionGracePeriod     time.Duration
	SuspensionDQEnabled       bool
	AuditsRequiredForVetting  int64
	UptimesRequiredForVetting int64
	AuditHistory              AuditHistoryConfig
}

UpdateRequest is used to update a node status.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL