Documentation ¶
Index ¶
- Constants
- Variables
- func BeforeBound(str string, strBound ...string) bool
- func Cleanup(objC obj.Client, chunks *Storage)
- func RandSeq(n int) []byte
- type Annotation
- type Chunk
- func (*Chunk) Descriptor() ([]byte, []int)
- func (m *Chunk) GetHash() string
- func (m *Chunk) Marshal() (dAtA []byte, err error)
- func (m *Chunk) MarshalTo(dAtA []byte) (int, error)
- func (m *Chunk) MarshalToSizedBuffer(dAtA []byte) (int, error)
- func (*Chunk) ProtoMessage()
- func (m *Chunk) Reset()
- func (m *Chunk) Size() (n int)
- func (m *Chunk) String() string
- func (m *Chunk) Unmarshal(dAtA []byte) error
- func (m *Chunk) XXX_DiscardUnknown()
- func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (m *Chunk) XXX_Merge(src proto.Message)
- func (m *Chunk) XXX_Size() int
- func (m *Chunk) XXX_Unmarshal(b []byte) error
- type ChunkInfo
- func (*ChunkInfo) Descriptor() ([]byte, []int)
- func (m *ChunkInfo) GetChunk() *Chunk
- func (m *ChunkInfo) GetEdge() bool
- func (m *ChunkInfo) GetSizeBytes() int64
- func (m *ChunkInfo) Marshal() (dAtA []byte, err error)
- func (m *ChunkInfo) MarshalTo(dAtA []byte) (int, error)
- func (m *ChunkInfo) MarshalToSizedBuffer(dAtA []byte) (int, error)
- func (*ChunkInfo) ProtoMessage()
- func (m *ChunkInfo) Reset()
- func (m *ChunkInfo) Size() (n int)
- func (m *ChunkInfo) String() string
- func (m *ChunkInfo) Unmarshal(dAtA []byte) error
- func (m *ChunkInfo) XXX_DiscardUnknown()
- func (m *ChunkInfo) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (m *ChunkInfo) XXX_Merge(src proto.Message)
- func (m *ChunkInfo) XXX_Size() int
- func (m *ChunkInfo) XXX_Unmarshal(b []byte) error
- type DataReader
- func (dr *DataReader) BoundReader(tagUpperBound ...string) *DataReader
- func (dr *DataReader) DataRef() *DataRef
- func (dr *DataReader) Get(w io.Writer) error
- func (dr *DataReader) Iterate(f func(*Tag, io.Reader) error, tagUpperBound ...string) error
- func (dr *DataReader) Len() int64
- func (dr *DataReader) PeekTag() (*Tag, error)
- func (dr *DataReader) PeekTags() ([]*Tag, error)
- type DataRef
- func (*DataRef) Descriptor() ([]byte, []int)
- func (m *DataRef) GetChunkInfo() *ChunkInfo
- func (m *DataRef) GetHash() string
- func (m *DataRef) GetOffsetBytes() int64
- func (m *DataRef) GetSizeBytes() int64
- func (m *DataRef) GetTags() []*Tag
- func (m *DataRef) Marshal() (dAtA []byte, err error)
- func (m *DataRef) MarshalTo(dAtA []byte) (int, error)
- func (m *DataRef) MarshalToSizedBuffer(dAtA []byte) (int, error)
- func (*DataRef) ProtoMessage()
- func (m *DataRef) Reset()
- func (m *DataRef) Size() (n int)
- func (m *DataRef) String() string
- func (m *DataRef) Unmarshal(dAtA []byte) error
- func (m *DataRef) XXX_DiscardUnknown()
- func (m *DataRef) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (m *DataRef) XXX_Merge(src proto.Message)
- func (m *DataRef) XXX_Size() int
- func (m *DataRef) XXX_Unmarshal(b []byte) error
- type Reader
- func (r *Reader) Get(w io.Writer) error
- func (r *Reader) Iterate(f func(*DataReader) error, tagUpperBound ...string) error
- func (r *Reader) Next() (*DataReader, error)
- func (r *Reader) NextDataRefs(dataRefs []*DataRef)
- func (r *Reader) NextTagReader() *TagReader
- func (r *Reader) Peek() (*DataReader, error)
- func (r *Reader) PeekTag() (*Tag, error)
- type Storage
- func (s *Storage) Delete(ctx context.Context, hash string) error
- func (s *Storage) DeleteAll(ctx context.Context) error
- func (s *Storage) List(ctx context.Context, f func(string) error) error
- func (s *Storage) NewReader(ctx context.Context, dataRefs ...*DataRef) *Reader
- func (s *Storage) NewWriter(ctx context.Context, averageBits int, seed int64, f WriterFunc) *Writer
- type StorageOption
- type Tag
- func (*Tag) Descriptor() ([]byte, []int)
- func (m *Tag) GetId() string
- func (m *Tag) GetSizeBytes() int64
- func (m *Tag) Marshal() (dAtA []byte, err error)
- func (m *Tag) MarshalTo(dAtA []byte) (int, error)
- func (m *Tag) MarshalToSizedBuffer(dAtA []byte) (int, error)
- func (*Tag) ProtoMessage()
- func (m *Tag) Reset()
- func (m *Tag) Size() (n int)
- func (m *Tag) String() string
- func (m *Tag) Unmarshal(dAtA []byte) error
- func (m *Tag) XXX_DiscardUnknown()
- func (m *Tag) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (m *Tag) XXX_Merge(src proto.Message)
- func (m *Tag) XXX_Size() int
- func (m *Tag) XXX_Unmarshal(b []byte) error
- type TagReader
- type Writer
- type WriterFunc
Constants ¶
const ( // KB is Kilobyte. KB = 1024 // MB is Megabyte. MB = 1024 * KB )
const (
// WindowSize is the size of the rolling hash window.
WindowSize = 64
)
Variables ¶
Functions ¶
func BeforeBound ¶
BeforeBound checks if the passed in string is before the string bound (exclusive). The string bound is optional, so if no string bound is passed then it returns true.
Types ¶
type Annotation ¶
type Annotation struct { RefDataRefs []*DataRef NextDataRef *DataRef Data interface{} // contains filtered or unexported fields }
Annotation is used to associate information with data written into the chunk storage layer.
type Chunk ¶
type Chunk struct { Hash string `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` }
func (*Chunk) Descriptor ¶
func (*Chunk) ProtoMessage ¶
func (*Chunk) ProtoMessage()
func (*Chunk) XXX_DiscardUnknown ¶
func (m *Chunk) XXX_DiscardUnknown()
func (*Chunk) XXX_Marshal ¶
func (*Chunk) XXX_Unmarshal ¶
type ChunkInfo ¶
type ChunkInfo struct { Chunk *Chunk `protobuf:"bytes,1,opt,name=chunk,proto3" json:"chunk,omitempty"` SizeBytes int64 `protobuf:"varint,2,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"` Edge bool `protobuf:"varint,3,opt,name=edge,proto3" json:"edge,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` }
func (*ChunkInfo) Descriptor ¶
func (*ChunkInfo) GetSizeBytes ¶
func (*ChunkInfo) MarshalToSizedBuffer ¶
func (*ChunkInfo) ProtoMessage ¶
func (*ChunkInfo) ProtoMessage()
func (*ChunkInfo) XXX_DiscardUnknown ¶
func (m *ChunkInfo) XXX_DiscardUnknown()
func (*ChunkInfo) XXX_Marshal ¶
func (*ChunkInfo) XXX_Unmarshal ¶
type DataReader ¶
type DataReader struct {
// contains filtered or unexported fields
}
DataReader is an abstraction that lazily reads data referenced by a data reference. The seed is set to avoid re-downloading a chunk that is shared between this data reference and the prior in a chain of data references.
func (*DataReader) BoundReader ¶
func (dr *DataReader) BoundReader(tagUpperBound ...string) *DataReader
BoundReader creates a new data reader that reads a subset of the remaining tags in the current data reader. This tag subset is determined by the optional parameter tagUpperBound which specifies the upper bound (exclusive) of the new data reader. BoundReader will progress the current data reader past the tags in the tag subset. Data in the tag subset is fetched lazily by the new data reader.
func (*DataReader) DataRef ¶
func (dr *DataReader) DataRef() *DataRef
DataRef returns the data reference associated with this data reader.
func (*DataReader) Get ¶
func (dr *DataReader) Get(w io.Writer) error
Get writes the referenced data. This does not take into account the reading of tags.
func (*DataReader) Iterate ¶
Iterate iterates over the tags in the data reference and passes the tag and a reader for getting the tagged data to the callback function. tagUpperBound is an optional parameter for specifiying the upper bound (exclusive) of the iteration.
func (*DataReader) Len ¶
func (dr *DataReader) Len() int64
Len returns the length of the remaining data to be read.
func (*DataReader) PeekTag ¶
func (dr *DataReader) PeekTag() (*Tag, error)
PeekTag returns the next tag without progressing the reader.
func (*DataReader) PeekTags ¶
func (dr *DataReader) PeekTags() ([]*Tag, error)
PeekTags returns the tags left in the reader without progressing the reader.
type DataRef ¶
type DataRef struct { // The chunk the referenced data is located in. ChunkInfo *ChunkInfo `protobuf:"bytes,1,opt,name=chunk_info,json=chunkInfo,proto3" json:"chunk_info,omitempty"` // The hash of the data being referenced. // This field is empty when it is equal to the chunk hash (the ref is the whole chunk). Hash string `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"` // The offset and size used for accessing the data within the chunk. OffsetBytes int64 `protobuf:"varint,3,opt,name=offset_bytes,json=offsetBytes,proto3" json:"offset_bytes,omitempty"` SizeBytes int64 `protobuf:"varint,4,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"` Tags []*Tag `protobuf:"bytes,5,rep,name=tags,proto3" json:"tags,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` }
DataRef is a reference to data within a chunk.
func Reference ¶
Reference creates a data reference for the full chunk referenced by a data reference.
func (*DataRef) Descriptor ¶
func (*DataRef) GetChunkInfo ¶
func (*DataRef) GetOffsetBytes ¶
func (*DataRef) GetSizeBytes ¶
func (*DataRef) MarshalToSizedBuffer ¶
func (*DataRef) ProtoMessage ¶
func (*DataRef) ProtoMessage()
func (*DataRef) XXX_DiscardUnknown ¶
func (m *DataRef) XXX_DiscardUnknown()
func (*DataRef) XXX_Marshal ¶
func (*DataRef) XXX_Unmarshal ¶
type Reader ¶
type Reader struct {
// contains filtered or unexported fields
}
Reader reads data from chunk storage.
func (*Reader) Get ¶
Get writes the concatenation of the data represented by the data references set in the reader. (bryce) probably should make a decision on whether this should be blocked for a reader that already has been partially iterated.
func (*Reader) Iterate ¶
func (r *Reader) Iterate(f func(*DataReader) error, tagUpperBound ...string) error
Iterate iterates over the data readers for the current data references set in the reader. tagUpperBound is an optional parameter for specifiying the upper bound (exclusive) of the iteration.
func (*Reader) Next ¶
func (r *Reader) Next() (*DataReader, error)
Next returns the next data reader and progresses the reader.
func (*Reader) NextDataRefs ¶
NextDataRefs sets the next data references for the reader.
func (*Reader) NextTagReader ¶
NextTagReader sets up a tag reader for the next tagged data.
func (*Reader) Peek ¶
func (r *Reader) Peek() (*DataReader, error)
Peek returns the next data reader without progressing the reader.
type Storage ¶
type Storage struct {
// contains filtered or unexported fields
}
Storage is the abstraction that manages chunk storage.
func LocalStorage ¶
LocalStorage creates a local chunk storage instance. Useful for storage layer tests.
func NewStorage ¶
func NewStorage(objC obj.Client, opts ...StorageOption) *Storage
NewStorage creates a new Storage.
type StorageOption ¶ added in v1.9.8
type StorageOption func(s *Storage)
StorageOption configures a storage.
func ServiceEnvToOptions ¶ added in v1.9.8
func ServiceEnvToOptions(env *serviceenv.ServiceEnv) []StorageOption
ServiceEnvToOptions converts a service environment configuration (specifically the storage configuration) to a set of storage options.
type Tag ¶
type Tag struct { Id string `protobuf:"bytes,1,opt,name=id,proto3" json:"id,omitempty"` SizeBytes int64 `protobuf:"varint,2,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` XXX_sizecache int32 `json:"-"` }
func (*Tag) Descriptor ¶
func (*Tag) GetSizeBytes ¶
func (*Tag) ProtoMessage ¶
func (*Tag) ProtoMessage()
func (*Tag) XXX_DiscardUnknown ¶
func (m *Tag) XXX_DiscardUnknown()
func (*Tag) XXX_Unmarshal ¶
type TagReader ¶
type TagReader struct {
// contains filtered or unexported fields
}
TagReader is an abstraction for reading tagged data. A tag may span multiple data readers, so this abstraction will connect the data readers and bound them appropriately to retrieve the content for a specific tag.
type Writer ¶
type Writer struct {
// contains filtered or unexported fields
}
Writer splits a byte stream into content defined chunks that are hashed and deduplicated/uploaded to object storage. Chunk split points are determined by a bit pattern in a rolling hash function (buzhash64 at https://github.com/chmduquesne/rollinghash). The byte stream is split into byte sets for parallel processing. Workers roll the rolling hash function and perform the execution of the writer function on these byte sets. The workers are daisy chained such that split points across byte sets can be resolved by shuffling bytes between workers in the chain and the writer function is executed on the sequential ordering of the chunks in the byte stream.
func (*Writer) Annotate ¶
func (w *Writer) Annotate(a *Annotation)
Annotate associates an annotation with the current data.
func (*Writer) AnnotationCount ¶ added in v1.9.8
AnnotationCount returns a count of the number of annotations created/referenced by the writer.
func (*Writer) ChunkCount ¶
ChunkCount returns a count of the number of chunks created/referenced by the writer.
func (*Writer) Copy ¶ added in v1.9.5
func (w *Writer) Copy(dr *DataReader) error
Copy copies data from a data reader to the writer. The copy will either be by reading the referenced data, or just copying the data reference (cheap copy).
type WriterFunc ¶
type WriterFunc func([]*Annotation) error
WriterFunc is a callback that returns the annotations within a chunk.