retriever

package
v0.23.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 17, 2024 License: Apache-2.0, MIT Imports: 44 Imported by: 3

Documentation

Index

Constants

View Source
const BufferWindow = 5 * time.Millisecond
View Source
const GraphsyncDefaultInitialWait = 2 * time.Millisecond

Connect() may be a near-noop for already-connect libp2p connections, so this allows parallel goroutines of already-connected peers to queue and have the scoring logic to select one to start.

View Source
const HttpDefaultInitialWait time.Duration = 2 * time.Millisecond

Connect() is currently a noop, so this simply allows parallel goroutines to queue and the scoring logic to select one to start.

Variables

View Source
var (
	ErrHttpSelectorRequest = errors.New("HTTP retrieval for an explicit selector request")
	ErrNoHttpForPeer       = errors.New("no HTTP url for peer")
	ErrBadPathForRequest   = errors.New("bad path for request")
)
View Source
var (
	ErrRetrieverNotStarted         = errors.New("retriever not started")
	ErrDealProposalFailed          = errors.New("deal proposal failed")
	ErrNoCandidates                = errors.New("no candidates")
	ErrUnexpectedRetrieval         = errors.New("unexpected active retrieval")
	ErrHitRetrievalLimit           = errors.New("hit retrieval limit")
	ErrProposalCreationFailed      = errors.New("proposal creation failed")
	ErrRetrievalRegistrationFailed = errors.New("retrieval registration failed")
	ErrRetrievalFailed             = errors.New("retrieval failed")
	ErrAllRetrievalsFailed         = errors.New("all retrievals failed")
	ErrConnectFailed               = errors.New("unable to connect to provider")
	ErrAllQueriesFailed            = errors.New("all queries failed")
	ErrRetrievalTimedOut           = errors.New("retrieval timed out")
	ErrRetrievalAlreadyRunning     = errors.New("retrieval already running for CID")
)

Functions

func NewGraphsyncRetriever added in v0.7.0

func NewGraphsyncRetriever(session Session, client GraphsyncClient) types.CandidateRetriever

NewGraphsyncRetriever makes a new CandidateRetriever for Graphsync retrievals (transport-graphsync-filecoinv1).

func NewGraphsyncRetrieverWithConfig added in v0.10.0

func NewGraphsyncRetrieverWithConfig(
	session Session,
	client GraphsyncClient,
	clock clock.Clock,
	initialPause time.Duration,
	noDirtyClose bool,
) types.CandidateRetriever

func NewHttpRetriever added in v0.10.0

func NewHttpRetriever(session Session, client *http.Client) types.CandidateRetriever

NewHttpRetriever makes a new CandidateRetriever for verified CAR HTTP retrievals (transport-ipfs-gateway-http).

func NewHttpRetrieverWithDeps added in v0.10.0

func NewHttpRetrieverWithDeps(
	session Session,
	client *http.Client,
	clock clock.Clock,
	awaitReceivedCandidates chan<- struct{},
	initialPause time.Duration,
	noDirtyClose bool,
) types.CandidateRetriever

func NewProtocolSplitter added in v0.4.0

func NewProtocolSplitter(protocols []multicodec.Code) types.CandidateSplitter[multicodec.Code]

func RetrievalProposalForAsk

func RetrievalProposalForAsk(ask *retrievaltypes.QueryResponse, c cid.Cid, selector ipld.Node) (*retrievaltypes.DealProposal, error)

Types

type AssignableCandidateFinder added in v0.4.0

type AssignableCandidateFinder struct {
	// contains filtered or unexported fields
}

AssignableCandidateFinder finds and filters candidates for a given retrieval

func NewAssignableCandidateFinder added in v0.4.0

func NewAssignableCandidateFinder(candidateSource types.CandidateSource, filterIndexerCandidate FilterIndexerCandidate) AssignableCandidateFinder

func NewAssignableCandidateFinderWithClock added in v0.6.0

func NewAssignableCandidateFinderWithClock(candidateSource types.CandidateSource, filterIndexerCandidate FilterIndexerCandidate, clock clock.Clock) AssignableCandidateFinder

func (AssignableCandidateFinder) FindCandidates added in v0.4.0

func (acf AssignableCandidateFinder) FindCandidates(ctx context.Context, request types.RetrievalRequest, eventsCallback func(types.RetrievalEvent), onCandidates func([]types.RetrievalCandidate)) error

type BitswapConfig added in v0.4.0

type BitswapConfig struct {
	BlockTimeout            time.Duration
	Concurrency             int
	ConcurrencyPerRetrieval int
}

BitswapConfig contains configurable parameters for bitswap fetching

type BitswapRetriever added in v0.4.0

type BitswapRetriever struct {
	// contains filtered or unexported fields
}

BitswapRetriever uses bitswap to retrieve data BitswapRetriever retrieves using a combination of a go-bitswap client specially configured per retrieval, underneath a blockservice and a go-fetcher Fetcher. Selectors are used to travers the dag to make sure the CARs for bitswap match graphsync Note: this is a tradeoff over go-merkledag for traversal, cause selector execution is slow. But the solution is to improve selector execution, not introduce unpredictable encoding.

func NewBitswapRetrieverFromDeps added in v0.4.0

func NewBitswapRetrieverFromDeps(
	ctx context.Context,
	bsrv blockservice.BlockService,
	routing IndexerRouting,
	inProgressCids InProgressCids,
	bstore MultiBlockstore,
	cfg BitswapConfig,
	clock clock.Clock,
	awaitReceivedCandidates chan<- struct{},
) *BitswapRetriever

NewBitswapRetrieverFromDeps is primarily for testing, constructing behavior from direct dependencies

func NewBitswapRetrieverFromHost added in v0.4.0

func NewBitswapRetrieverFromHost(
	ctx context.Context,
	host host.Host,
	cfg BitswapConfig,
) *BitswapRetriever

NewBitswapRetrieverFromHost constructs a new bitswap retriever for the given libp2p host

func (*BitswapRetriever) Retrieve added in v0.4.0

Retrieve initializes a new bitswap session

type DirectCandidateSource added in v0.22.0

type DirectCandidateSource struct {
	// contains filtered or unexported fields
}

DirectCandidateSource finds candidate protocols from a fixed set of peers

func NewDirectCandidateSource added in v0.22.0

func NewDirectCandidateSource(providers []types.Provider, opts ...Option) *DirectCandidateSource

NewDirectCandidateSource returns a new DirectCandidateFinder for the given providers

func (*DirectCandidateSource) FindCandidates added in v0.22.0

func (d *DirectCandidateSource) FindCandidates(ctx context.Context, c cid.Cid, cb func(types.RetrievalCandidate)) error

FindCandidates finds supported protocols for each peer TODO: Cache the results?

type ErrHttpRequestFailure added in v0.11.0

type ErrHttpRequestFailure struct {
	Code int
}

func (ErrHttpRequestFailure) Error added in v0.11.0

func (e ErrHttpRequestFailure) Error() string

type FilterIndexerCandidate added in v0.6.6

type FilterIndexerCandidate func(types.RetrievalCandidate) (bool, types.RetrievalCandidate)

type GetStorageProviderTimeout

type GetStorageProviderTimeout func(peer peer.ID) time.Duration

type GraphsyncClient added in v0.10.0

type GraphsyncClient interface {
	Connect(ctx context.Context, peerAddr peer.AddrInfo) error
	RetrieveFromPeer(
		ctx context.Context,
		linkSystem ipld.LinkSystem,
		peerID peer.ID,
		proposal *retrievaltypes.DealProposal,
		selector ipld.Node,
		maxLinks uint64,
		eventsCallback datatransfer.Subscriber,
		gracefulShutdownRequested <-chan struct{},
	) (*types.RetrievalStats, error)
}

type InProgressCids added in v0.4.0

type InProgressCids interface {
	Inc(cid.Cid, types.RetrievalID)
	Dec(cid.Cid, types.RetrievalID)
}

type IndexerRouting added in v0.4.0

type IndexerRouting interface {
	AddProviders(types.RetrievalID, []types.RetrievalCandidate)
	RemoveProviders(types.RetrievalID)
}

IndexerRouting are the required methods to track indexer routing

type MultiBlockstore added in v0.4.0

type MultiBlockstore interface {
	AddLinkSystem(id types.RetrievalID, lsys *linking.LinkSystem) error
	RemoveLinkSystem(id types.RetrievalID)
}

MultiBlockstore are the require methods to track linksystems

type Option added in v0.22.0

type Option func(*DirectCandidateSource)

func WithLibp2pCandidateDiscovery added in v0.22.0

func WithLibp2pCandidateDiscovery(h host.Host) Option

WithLibp2pCandidateDiscovery sets a libp2p Host for the DirectCandidateFinder. If a Host is set, the providers will be queried to discover available protocols, otherwise all protocols will be assumed by default.

type ProtocolGraphsync added in v0.10.0

type ProtocolGraphsync struct {
	Client GraphsyncClient
	Clock  clock.Clock
}

func (ProtocolGraphsync) Code added in v0.10.0

func (pg ProtocolGraphsync) Code() multicodec.Code

func (*ProtocolGraphsync) Connect added in v0.10.0

func (pg *ProtocolGraphsync) Connect(ctx context.Context, retrieval *retrieval, candidate types.RetrievalCandidate) (time.Duration, error)

func (ProtocolGraphsync) GetMergedMetadata added in v0.10.0

func (pg ProtocolGraphsync) GetMergedMetadata(cid cid.Cid, currentMetadata, newMetadata metadata.Protocol) metadata.Protocol

func (*ProtocolGraphsync) Retrieve added in v0.10.0

func (pg *ProtocolGraphsync) Retrieve(
	ctx context.Context,
	retrieval *retrieval,
	shared *retrievalShared,
	timeout time.Duration,
	candidate types.RetrievalCandidate,
) (*types.RetrievalStats, error)

type ProtocolHttp added in v0.10.0

type ProtocolHttp struct {
	Client *http.Client
	Clock  clock.Clock
}

func (ProtocolHttp) Code added in v0.10.0

func (ph ProtocolHttp) Code() multicodec.Code

func (*ProtocolHttp) Connect added in v0.10.0

func (ph *ProtocolHttp) Connect(ctx context.Context, retrieval *retrieval, candidate types.RetrievalCandidate) (time.Duration, error)

func (ProtocolHttp) GetMergedMetadata added in v0.10.0

func (ph ProtocolHttp) GetMergedMetadata(cid cid.Cid, currentMetadata, newMetadata metadata.Protocol) metadata.Protocol

func (*ProtocolHttp) Retrieve added in v0.10.0

func (ph *ProtocolHttp) Retrieve(
	ctx context.Context,
	retrieval *retrieval,
	shared *retrievalShared,
	timeout time.Duration,
	candidate types.RetrievalCandidate,
) (*types.RetrievalStats, error)

type ProtocolSplitter added in v0.4.0

type ProtocolSplitter struct {
	// contains filtered or unexported fields
}

func (*ProtocolSplitter) SplitRetrievalRequest added in v0.6.0

func (ps *ProtocolSplitter) SplitRetrievalRequest(ctx context.Context, request types.RetrievalRequest, events func(types.RetrievalEvent)) types.RetrievalSplitter[multicodec.Code]

type Retriever

type Retriever struct {
	// contains filtered or unexported fields
}

func NewRetriever

func NewRetriever(
	ctx context.Context,
	session Session,
	candidateSource types.CandidateSource,
	protocolRetrievers map[multicodec.Code]types.CandidateRetriever,
) (*Retriever, error)

func NewRetrieverWithClock added in v0.7.0

func NewRetrieverWithClock(
	ctx context.Context,
	session Session,
	candidateSource types.CandidateSource,
	protocolRetrievers map[multicodec.Code]types.CandidateRetriever,
	clock clock.Clock,
) (*Retriever, error)

func (*Retriever) RegisterSubscriber added in v0.3.0

func (retriever *Retriever) RegisterSubscriber(subscriber types.RetrievalEventSubscriber) func()

RegisterSubscriber registers a subscriber to receive all events fired during the process of making a retrieval, including the process of querying available storage providers to find compatible ones to attempt retrieval from.

func (*Retriever) Retrieve

func (retriever *Retriever) Retrieve(
	ctx context.Context,
	request types.RetrievalRequest,
	eventsCB func(types.RetrievalEvent),
) (*types.RetrievalStats, error)

Retrieve attempts to retrieve the given CID using the configured CandidateSource to find storage providers that should have the CID.

func (*Retriever) Start added in v0.3.0

func (retriever *Retriever) Start()

Start will start the retriever events system

func (*Retriever) Stop added in v0.3.0

func (retriever *Retriever) Stop() chan struct{}

Stop will stop the retriever events system and return a channel that will be closed when shutdown has completed

type Session added in v0.6.8

type Session interface {
	GetStorageProviderTimeout(storageProviderId peer.ID) time.Duration
	FilterIndexerCandidate(candidate types.RetrievalCandidate) (bool, types.RetrievalCandidate)

	RegisterRetrieval(retrievalId types.RetrievalID, cid cid.Cid, selector datamodel.Node) bool
	AddToRetrieval(retrievalId types.RetrievalID, storageProviderIds []peer.ID) error
	EndRetrieval(retrievalId types.RetrievalID) error

	RecordConnectTime(storageProviderId peer.ID, connectTime time.Duration)
	RecordFirstByteTime(storageProviderId peer.ID, firstByteTime time.Duration)
	RecordFailure(retrievalId types.RetrievalID, storageProviderId peer.ID) error
	RecordSuccess(storageProviderId peer.ID, bandwidthBytesPerSecond uint64)

	ChooseNextProvider(peers []peer.ID, metadata []metadata.Protocol) int
}

type TimeCounter added in v0.6.4

type TimeCounter struct {
	// contains filtered or unexported fields
}

timeCounter is used to generate a monotonically increasing sequence. It starts at the current time, then increments on each call to next.

func NewTimeCounter added in v0.6.4

func NewTimeCounter() *TimeCounter

func (*TimeCounter) Next added in v0.6.4

func (tc *TimeCounter) Next() uint64

type TransportProtocol added in v0.10.0

type TransportProtocol interface {
	Code() multicodec.Code
	GetMergedMetadata(cid cid.Cid, currentMetadata, newMetadata metadata.Protocol) metadata.Protocol
	Connect(ctx context.Context, retrieval *retrieval, candidate types.RetrievalCandidate) (time.Duration, error)
	Retrieve(
		ctx context.Context,
		retrieval *retrieval,
		shared *retrievalShared,
		timeout time.Duration,
		candidate types.RetrievalCandidate,
	) (*types.RetrievalStats, error)
}

TransportProtocol implements the protocol-specific portions of a parallel- peer retriever. It is responsible for communicating with individual peers and also bears responsibility for some of the peer-selection logic.

Directories

Path Synopsis
Package combinators contains tools to put various retrieval components together to make full retrievers
Package combinators contains tools to put various retrieval components together to make full retrievers
Package coordinators contains retrievers that coordinate multiple child retrievers
Package coordinators contains retrievers that coordinate multiple child retrievers
Package prioritywaitqueue implements a blocking queue for prioritised coordination of goroutine execution.
Package prioritywaitqueue implements a blocking queue for prioritised coordination of goroutine execution.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL