The highest tagged major version is v2.

chunk

package

v1.9.11-890996e78e342d... Latest Latest Go to latest Published: Feb 5, 2020 License: Apache-2.0 Imports: 20 Imported by: 2

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/pachyderm/pachyderm

Documentation ¶

Index ¶

Constants
Variables
func Cleanup(objC obj.Client, chunks *Storage)
func RandSeq(n int) []byte
type Annotation
type Chunk
- func (*Chunk) Descriptor() ([]byte, []int)
- func (m *Chunk) GetHash() string
- func (m *Chunk) Marshal() (dAtA []byte, err error)
- func (m *Chunk) MarshalTo(dAtA []byte) (int, error)
- func (m *Chunk) MarshalToSizedBuffer(dAtA []byte) (int, error)
- func (*Chunk) ProtoMessage()
- func (m *Chunk) Reset()
- func (m *Chunk) Size() (n int)
- func (m *Chunk) String() string
- func (m *Chunk) Unmarshal(dAtA []byte) error
- func (m *Chunk) XXX_DiscardUnknown()
- func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (m *Chunk) XXX_Merge(src proto.Message)
- func (m *Chunk) XXX_Size() int
- func (m *Chunk) XXX_Unmarshal(b []byte) error
type Copy
type DataRef
- func (*DataRef) Descriptor() ([]byte, []int)
- func (m *DataRef) GetChunk() *Chunk
- func (m *DataRef) GetHash() string
- func (m *DataRef) GetOffsetBytes() int64
- func (m *DataRef) GetSizeBytes() int64
- func (m *DataRef) Marshal() (dAtA []byte, err error)
- func (m *DataRef) MarshalTo(dAtA []byte) (int, error)
- func (m *DataRef) MarshalToSizedBuffer(dAtA []byte) (int, error)
- func (*DataRef) ProtoMessage()
- func (m *DataRef) Reset()
- func (m *DataRef) Size() (n int)
- func (m *DataRef) String() string
- func (m *DataRef) Unmarshal(dAtA []byte) error
- func (m *DataRef) XXX_DiscardUnknown()
- func (m *DataRef) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
- func (m *DataRef) XXX_Merge(src proto.Message)
- func (m *DataRef) XXX_Size() int
- func (m *DataRef) XXX_Unmarshal(b []byte) error
type Reader
- func (r *Reader) Close() error
- func (r *Reader) Len() int64
- func (r *Reader) NextRange(dataRefs []*DataRef)
- func (r *Reader) OnSplit(f func())
- func (r *Reader) Read(data []byte) (int, error)
- func (r *Reader) ReadCopy(n ...int64) (*Copy, error)
type ReaderFunc
type Storage
- func LocalStorage(tb testing.TB) (obj.Client, *Storage)
- func NewStorage(objC obj.Client, opts ...StorageOption) *Storage
- func (s *Storage) Delete(ctx context.Context, hash string) error
- func (s *Storage) DeleteAll(ctx context.Context) error
- func (s *Storage) List(ctx context.Context, f func(string) error) error
- func (s *Storage) NewReader(ctx context.Context, f ...ReaderFunc) *Reader
- func (s *Storage) NewWriter(ctx context.Context, averageBits int, f WriterFunc, seed int64) *Writer
type StorageOption
- func ServiceEnvToOptions(env *serviceenv.ServiceEnv) []StorageOption
type Writer
- func (w *Writer) Annotate(a *Annotation)
- func (w *Writer) AnnotatedBytesSize() int64
- func (w *Writer) AnnotationCount() int64
- func (w *Writer) ChunkCount() int64
- func (w *Writer) Close() error
- func (w *Writer) Copy(r *Reader, n ...int64) error
- func (w *Writer) Flush() error
- func (w *Writer) Reset()
- func (w *Writer) Write(data []byte) (int, error)
- func (w *Writer) WriteCopy(c *Copy) error
type WriterFunc

Constants ¶

View Source

const (
	// MB is Megabytes.
	MB = 1024 * 1024
	// WindowSize is the size of the rolling hash window.
	WindowSize = 64
)

Variables ¶

View Source

var (
	ErrInvalidLengthChunk        = fmt.Errorf("proto: negative length found during unmarshaling")
	ErrIntOverflowChunk          = fmt.Errorf("proto: integer overflow")
	ErrUnexpectedEndOfGroupChunk = fmt.Errorf("proto: unexpected end of group")
)

Functions ¶

func Cleanup ¶

func Cleanup(objC obj.Client, chunks *Storage)

Cleanup cleans up a local chunk storage instance.

func RandSeq ¶

func RandSeq(n int) []byte

RandSeq generates a random sequence of data (n is number of bytes)

Types ¶

type Annotation ¶

type Annotation struct {
	Offset      int64
	RefDataRefs []*DataRef
	NextDataRef *DataRef
	Meta        interface{}
}

Annotation is used to associate information with a set of bytes written into the chunk storage layer.

type Chunk ¶

type Chunk struct {
	Hash                 string   `protobuf:"bytes,1,opt,name=hash,proto3" json:"hash,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

func (*Chunk) Descriptor ¶

func (*Chunk) Descriptor() ([]byte, []int)

func (*Chunk) GetHash ¶

func (m *Chunk) GetHash() string

func (*Chunk) Marshal ¶

func (m *Chunk) Marshal() (dAtA []byte, err error)

func (*Chunk) MarshalTo ¶

func (m *Chunk) MarshalTo(dAtA []byte) (int, error)

func (*Chunk) MarshalToSizedBuffer ¶

func (m *Chunk) MarshalToSizedBuffer(dAtA []byte) (int, error)

func (*Chunk) ProtoMessage ¶

func (*Chunk) ProtoMessage()

func (*Chunk) Reset ¶

func (m *Chunk) Reset()

func (*Chunk) Size ¶

func (m *Chunk) Size() (n int)

func (*Chunk) String ¶

func (m *Chunk) String() string

func (*Chunk) Unmarshal ¶

func (m *Chunk) Unmarshal(dAtA []byte) error

func (*Chunk) XXX_DiscardUnknown ¶

func (m *Chunk) XXX_DiscardUnknown()

func (*Chunk) XXX_Marshal ¶

func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*Chunk) XXX_Merge ¶

func (m *Chunk) XXX_Merge(src proto.Message)

func (*Chunk) XXX_Size ¶

func (m *Chunk) XXX_Size() int

func (*Chunk) XXX_Unmarshal ¶

func (m *Chunk) XXX_Unmarshal(b []byte) error

type Copy ¶ added in v1.9.5

type Copy struct {
	// contains filtered or unexported fields
}

Copy is the basic data structure to represent a copy of data from a reader to a writer. before/after are the raw bytes that precede/follow full chunks in the set of bytes represented by the copy.

type DataRef ¶

type DataRef struct {
	// The chunk the referenced data is located in.
	Chunk *Chunk `protobuf:"bytes,1,opt,name=chunk,proto3" json:"chunk,omitempty"`
	// The hash of the data being referenced.
	// This field is empty when it is equal to the chunk hash (the ref is the whole chunk).
	Hash string `protobuf:"bytes,2,opt,name=hash,proto3" json:"hash,omitempty"`
	// The offset and size used for accessing the data within the chunk.
	OffsetBytes          int64    `protobuf:"varint,3,opt,name=offset_bytes,json=offsetBytes,proto3" json:"offset_bytes,omitempty"`
	SizeBytes            int64    `protobuf:"varint,4,opt,name=size_bytes,json=sizeBytes,proto3" json:"size_bytes,omitempty"`
	XXX_NoUnkeyedLiteral struct{} `json:"-"`
	XXX_unrecognized     []byte   `json:"-"`
	XXX_sizecache        int32    `json:"-"`
}

DataRef is a reference to data within a chunk.

func (*DataRef) Descriptor ¶

func (*DataRef) Descriptor() ([]byte, []int)

func (*DataRef) GetChunk ¶

func (m *DataRef) GetChunk() *Chunk

func (*DataRef) GetHash ¶

func (m *DataRef) GetHash() string

func (*DataRef) GetOffsetBytes ¶

func (m *DataRef) GetOffsetBytes() int64

func (*DataRef) GetSizeBytes ¶

func (m *DataRef) GetSizeBytes() int64

func (*DataRef) Marshal ¶

func (m *DataRef) Marshal() (dAtA []byte, err error)

func (*DataRef) MarshalTo ¶

func (m *DataRef) MarshalTo(dAtA []byte) (int, error)

func (*DataRef) MarshalToSizedBuffer ¶

func (m *DataRef) MarshalToSizedBuffer(dAtA []byte) (int, error)

func (*DataRef) ProtoMessage ¶

func (*DataRef) ProtoMessage()

func (*DataRef) Reset ¶

func (m *DataRef) Reset()

func (*DataRef) Size ¶

func (m *DataRef) Size() (n int)

func (*DataRef) String ¶

func (m *DataRef) String() string

func (*DataRef) Unmarshal ¶

func (m *DataRef) Unmarshal(dAtA []byte) error

func (*DataRef) XXX_DiscardUnknown ¶

func (m *DataRef) XXX_DiscardUnknown()

func (*DataRef) XXX_Marshal ¶

func (m *DataRef) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)

func (*DataRef) XXX_Merge ¶

func (m *DataRef) XXX_Merge(src proto.Message)

func (*DataRef) XXX_Size ¶

func (m *DataRef) XXX_Size() int

func (*DataRef) XXX_Unmarshal ¶

func (m *DataRef) XXX_Unmarshal(b []byte) error

type Reader ¶

type Reader struct {
	// contains filtered or unexported fields
}

Reader reads a set of DataRefs from chunk storage.

func (*Reader) Close ¶

func (r *Reader) Close() error

Close closes the reader. Currently a no-op, but will be used when streaming is implemented.

func (*Reader) Len ¶

func (r *Reader) Len() int64

Len returns the number of bytes left.

func (*Reader) NextRange ¶

func (r *Reader) NextRange(dataRefs []*DataRef)

NextRange sets the next range for the reader.

func (*Reader) OnSplit ¶ added in v1.9.5

func (r *Reader) OnSplit(f func())

OnSplit registers a callback for when a chunk split point is encountered. The callback is only executed at a split point found after reading WindowSize bytes. The reason for this is to guarantee that the same split point will appear in the writer the data is being written to.

func (*Reader) Read ¶

func (r *Reader) Read(data []byte) (int, error)

Read reads from the byte stream produced by the set of DataRefs.

func (*Reader) ReadCopy ¶ added in v1.9.5

func (r *Reader) ReadCopy(n ...int64) (*Copy, error)

ReadCopy reads copy data from the reader.

type ReaderFunc ¶

type ReaderFunc func() ([]*DataRef, error)

ReaderFunc is a callback that returns the next set of data references to a reader.

type Storage ¶

type Storage struct {
	// contains filtered or unexported fields
}

Storage is the abstraction that manages chunk storage.

func LocalStorage ¶

func LocalStorage(tb testing.TB) (obj.Client, *Storage)

LocalStorage creates a local chunk storage instance. Useful for storage layer tests.

func NewStorage ¶

func NewStorage(objC obj.Client, opts ...StorageOption) *Storage

NewStorage creates a new Storage.

func (*Storage) Delete ¶ added in v1.9.5

func (s *Storage) Delete(ctx context.Context, hash string) error

Delete deletes a chunk in object storage.

func (*Storage) DeleteAll ¶

func (s *Storage) DeleteAll(ctx context.Context) error

DeleteAll deletes all of the chunks in object storage.

func (*Storage) List ¶

func (s *Storage) List(ctx context.Context, f func(string) error) error

List lists all of the chunks in object storage.

func (*Storage) NewReader ¶

func (s *Storage) NewReader(ctx context.Context, f ...ReaderFunc) *Reader

NewReader creates an io.ReadCloser for a chunk. (bryce) The whole chunk is in-memory right now. Could be a problem with concurrency, particularly the merge process. May want to handle concurrency here (pass in multiple data refs)

func (*Storage) NewWriter ¶

func (s *Storage) NewWriter(ctx context.Context, averageBits int, f WriterFunc, seed int64) *Writer

NewWriter creates an io.WriteCloser for a stream of bytes to be chunked. Chunks are created based on the content, then hashed and deduplicated/uploaded to object storage. The callback arguments are the chunk hash and content.

type StorageOption ¶ added in v1.9.8

type StorageOption func(s *Storage)

StorageOption configures a storage.

func ServiceEnvToOptions ¶ added in v1.9.8

func ServiceEnvToOptions(env *serviceenv.ServiceEnv) []StorageOption

ServiceEnvToOptions converts a service environment configuration (specifically the storage configuration) to a set of storage options.

type Writer ¶

type Writer struct {
	// contains filtered or unexported fields
}

Writer splits a byte stream into content defined chunks that are hashed and deduplicated/uploaded to object storage. Chunk split points are determined by a bit pattern in a rolling hash function (buzhash64 at https://github.com/chmduquesne/rollinghash). The byte stream is split into byte sets for parallel processing. Workers roll the rolling hash function and perform the execution of the writer function on these byte sets. The workers are daisy chained such that split points across byte sets can be resolved by shuffling bytes between workers in the chain and the writer function is executed on the sequential ordering of the chunks in the byte stream.

func (*Writer) Annotate ¶

func (w *Writer) Annotate(a *Annotation)

Annotate associates an annotation with the current byte set.

func (*Writer) AnnotatedBytesSize ¶

func (w *Writer) AnnotatedBytesSize() int64

AnnotatedBytesSize returns the size of the bytes for the current annotation.

func (*Writer) AnnotationCount ¶ added in v1.9.8

func (w *Writer) AnnotationCount() int64

AnnotationCount returns a count of the number of annotations created/referenced by the writer.

func (*Writer) ChunkCount ¶

func (w *Writer) ChunkCount() int64

ChunkCount returns a count of the number of chunks created/referenced by the writer.

func (*Writer) Close ¶

func (w *Writer) Close() error

Close closes the writer.

func (*Writer) Copy ¶ added in v1.9.5

func (w *Writer) Copy(r *Reader, n ...int64) error

Copy does a cheap copy from a reader to a writer.

func (*Writer) Flush ¶ added in v1.9.5

func (w *Writer) Flush() error

Flush flushes the buffered data.

func (*Writer) Reset ¶ added in v1.9.5

func (w *Writer) Reset()

Reset resets the buffer and annotations.

func (*Writer) Write ¶

func (w *Writer) Write(data []byte) (int, error)

Write rolls through the data written, calling c.f when a chunk is found. Note: If making changes to this function, be wary of the performance implications (check before and after performance with chunker benchmarks).

func (*Writer) WriteCopy ¶ added in v1.9.5

func (w *Writer) WriteCopy(c *Copy) error

WriteCopy writes copy data to the writer.

type WriterFunc ¶

type WriterFunc func(*DataRef, []*Annotation) error

WriterFunc is a callback that returns a data reference to the next chunk and the annotations within the chunk.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL