bytesref

package

v0.0.0-...-80fb460 Latest Latest Go to latest Published: Nov 24, 2024 License: Apache-2.0 Imports: 14 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/geange/lucene-go

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func BytesToString(values []byte) string
func StringToBytes(value string) ([]byte, error)
type Allocator
type AllocatorBuilder
- func GetAllocatorBuilder() *AllocatorBuilder
- func (b *AllocatorBuilder) NewBytes(blockSize int, fn func(blocks [][]byte, start, end int)) Allocator
- func (b *AllocatorBuilder) NewDirect(blockSize int) Allocator
- func (b *AllocatorBuilder) NewRecyclingByteBlock(blockSize, maxBufferedBlocks int) Allocator
type Array
- func NewArray(bytesUsed int64) *Array
- func (r *Array) Append(bytes []byte) int
- func (r *Array) Clear()
- func (r *Array) Get(spare *Builder, index int) []byte
- func (r *Array) Iterator() BytesIterator
- func (r *Array) Size() int
type BlockPool
- func NewBlockPool(allocator Allocator) *BlockPool
- func (r *BlockPool) AllocSlice(slice []byte, upto int) int
- func (r *BlockPool) Append(bytes []byte)
- func (r *BlockPool) ByteOffset() int
- func (r *BlockPool) ByteUpto() int
- func (r *BlockPool) Current() []byte
- func (r *BlockPool) Get(index int) []byte
- func (r *BlockPool) GetAddress(offset uint32) ([]byte, error)
- func (r *BlockPool) GetBytes(textStart uint32) []byte
- func (r *BlockPool) NewSlice(size int) int
- func (r *BlockPool) NextBuffer()
- func (r *BlockPool) ReadBytes(offset int, bytes []byte, bytesOffset, bytesLength int)
- func (r *BlockPool) Reset(zeroFillBuffers, reuseFirst bool)
- func (r *BlockPool) SetBytesRefV1(builder *Builder, result []byte, offset, length int)
type Builder
- func NewBytesRefBuilder() *Builder
- func (r *Builder) AppendBuilder(builder *Builder)
- func (r *Builder) AppendByte(b byte)
- func (r *Builder) AppendBytes(b []byte)
- func (r *Builder) ByteAt(offset int) byte
- func (r *Builder) Bytes() []byte
- func (r *Builder) Clear()
- func (r *Builder) CopyBytes(b []byte, off, length int)
- func (r *Builder) CopyBytesBuilder(builder *Builder)
- func (r *Builder) Get() []byte
- func (r *Builder) Grow(capacity int)
- func (r *Builder) Length() int
- func (r *Builder) SetByteAt(offset int, b byte)
- func (r *Builder) SetLength(length int)
type BytesHash
- func NewBytesHash(pool *BlockPool, options ...BytesHashOption) (*BytesHash, error)
- func (r *BytesHash) Add(bs []byte) (int, error)
- func (r *BytesHash) AddByPoolOffset(offset uint32) int
- func (r *BytesHash) ByteStart(bytesID int) uint32
- func (r *BytesHash) Clear(resetPool bool)
- func (r *BytesHash) Close()
- func (r *BytesHash) Compact() []int
- func (r *BytesHash) Find(bytes []byte) int
- func (r *BytesHash) Get(id int) []byte
- func (r *BytesHash) ReInit()
- func (r *BytesHash) Size() int
- func (r *BytesHash) Sort() []int
type BytesHashOption
- func WithCapacity(capacity int) BytesHashOption
- func WithHash32(hasher hash.Hash32) BytesHashOption
- func WithStartArray(startArray StartArray) BytesHashOption
type BytesIterator
type DirectStartArray
- func NewDirectStartArray(initSize int) *DirectStartArray
- func (d *DirectStartArray) Clear() []uint32
- func (d *DirectStartArray) Grow() []uint32
- func (d *DirectStartArray) Init() []uint32
type DirectTrackingAllocator
type SortState
type StartArray

Constants ¶

View Source

const (
	BYTE_BLOCK_SHIFT = 15
	BYTE_BLOCK_SIZE  = 1 << BYTE_BLOCK_SHIFT
	BYTE_BLOCK_MASK  = BYTE_BLOCK_SIZE - 1
)

View Source

const (
	DEFAULT_BUFFERED_BLOCKS = 64
)

View Source

const (
	DefaultCapacity = 16
)

Variables ¶

View Source

var (
	// NEXT_LEVEL_ARRAY An array holding the offset into the LEVEL_SIZE_ARRAY to quickly navigate to the next slice level.
	NEXT_LEVEL_ARRAY = []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 9}

	// LEVEL_SIZE_ARRAY An array holding the level sizes for byte slices.
	LEVEL_SIZE_ARRAY = []int{5, 14, 20, 30, 40, 40, 80, 80, 120, 200}

	// FIRST_LEVEL_SIZE The first level size for new slices
	// See Also: NewSlice(int)
	FIRST_LEVEL_SIZE = LEVEL_SIZE_ARRAY[0]
)

Size of each slice. These arrays should be at most 16 elements (index is encoded with 4 bits). First array is just a compact way to encode X+1 with a max. Second array is the length of each slice, ie first slice is 5 bytes, next slice is 14 bytes, etc.

View Source

var (
	EMPTY_BYTES []byte
)

View Source

var (
	GOOD_FAST_HASH_SEED = time.Now().Unix()
)

Functions ¶

func BytesToString ¶

func BytesToString(values []byte) string

func StringToBytes ¶

func StringToBytes(value string) ([]byte, error)

Types ¶

type Allocator ¶

type Allocator interface {
	RecycleByteBlocks(blocks [][]byte, start, end int)
	GetByteBlock() []byte
}

Allocator Abstract class for allocating and freeing byte blocks.

type AllocatorBuilder ¶

type AllocatorBuilder struct {
}

func GetAllocatorBuilder ¶

func GetAllocatorBuilder() *AllocatorBuilder

func (*AllocatorBuilder) NewBytes ¶

func (b *AllocatorBuilder) NewBytes(blockSize int, fn func(blocks [][]byte, start, end int)) Allocator

func (*AllocatorBuilder) NewDirect ¶

func (b *AllocatorBuilder) NewDirect(blockSize int) Allocator

func (*AllocatorBuilder) NewRecyclingByteBlock ¶

func (b *AllocatorBuilder) NewRecyclingByteBlock(blockSize, maxBufferedBlocks int) Allocator

type Array ¶

type Array struct {
	// contains filtered or unexported fields
}

Array A simple append only random-access BytesRef array that stores full copies of the appended []byte in a BlockPool. Note: This class is not Thread-Safe!

func NewArray ¶

func NewArray(bytesUsed int64) *Array

func (*Array) Append ¶

func (r *Array) Append(bytes []byte) int

func (*Array) Clear ¶

func (r *Array) Clear()

func (*Array) Get ¶

func (r *Array) Get(spare *Builder, index int) []byte

func (*Array) Iterator ¶

func (r *Array) Iterator() BytesIterator

func (*Array) Size ¶

func (r *Array) Size() int

type BlockPool ¶

type BlockPool struct {
	// contains filtered or unexported fields
}

BlockPool that Posting and PostingVector use to write byte streams into shared fixed-size byte[] arrays. The idea is to allocate slices of increasing lengths For example, the first slice is 5 bytes, the next slice is 14, etc. We start by writing our bytes into the first 5 bytes. When we hit the end of the slice, we allocate the next slice and then write the address of the new slice into the last 4 bytes of the previous slice (the "forwarding address"). Each slice is filled with 0's initially, and we mark the end with a non-zero byte. This way the methods that are writing into the slice don't need to record its length and instead allocate a new slice once they hit a non-zero byte.

func NewBlockPool ¶

func NewBlockPool(allocator Allocator) *BlockPool

func (*BlockPool) AllocSlice ¶

func (r *BlockPool) AllocSlice(slice []byte, upto int) int

AllocSlice Creates a new byte slice with the given starting size and returns the slices offset in the pool.

func (*BlockPool) Append ¶

func (r *BlockPool) Append(bytes []byte)

Append Appends the bytes in the provided BytesRef at the current position.

func (*BlockPool) ByteOffset ¶

func (r *BlockPool) ByteOffset() int

func (*BlockPool) ByteUpto ¶

func (r *BlockPool) ByteUpto() int

func (*BlockPool) Current ¶

func (r *BlockPool) Current() []byte

func (*BlockPool) Get ¶

func (r *BlockPool) Get(index int) []byte

func (*BlockPool) GetAddress ¶

func (r *BlockPool) GetAddress(offset uint32) ([]byte, error)

func (*BlockPool) GetBytes ¶

func (r *BlockPool) GetBytes(textStart uint32) []byte

func (*BlockPool) NewSlice ¶

func (r *BlockPool) NewSlice(size int) int

NewSlice Allocates a new slice with the given size. See Also: FIRST_LEVEL_SIZE

func (*BlockPool) NextBuffer ¶

func (r *BlockPool) NextBuffer()

NextBuffer Advances the pool to its next buffer. This method should be called once after the constructor to initialize the pool. In contrast to the constructor a reset() call will advance the pool to its first buffer immediately.

func (*BlockPool) ReadBytes ¶

func (r *BlockPool) ReadBytes(offset int, bytes []byte, bytesOffset, bytesLength int)

ReadBytes Reads bytes out of the pool starting at the given offset with the given length into the given byte array at offset off. Note: this method allows to copy across block boundaries.

func (*BlockPool) Reset ¶

func (r *BlockPool) Reset(zeroFillBuffers, reuseFirst bool)

Reset Expert: Resets the pool to its initial state reusing the first buffer. Calling nextBuffer() is not needed after reset. zeroFillBuffers: if true the buffers are filled with 0. This should be set to true if this pool is used with slices. reuseFirst: if true the first buffer will be reused and calling nextBuffer() is not needed after reset if the block pool was used before ie. nextBuffer() was called before.

func (*BlockPool) SetBytesRefV1 ¶

func (r *BlockPool) SetBytesRefV1(builder *Builder, result []byte, offset, length int)

SetBytesRefV1 Fill the provided BytesRef with the bytes at the specified offset/length slice. This will avoid copying the bytes, if the slice fits into a single block; otherwise, it uses the provided Builder to copy bytes over.

type Builder ¶

type Builder struct {
	// contains filtered or unexported fields
}

Builder A builder for BytesRef instances.

func NewBytesRefBuilder ¶

func NewBytesRefBuilder() *Builder

NewBytesRefBuilder Sole constructor.

func (*Builder) AppendBuilder ¶

func (r *Builder) AppendBuilder(builder *Builder)

AppendBuilder Append the provided bytes to this builder.

func (*Builder) AppendByte ¶

func (r *Builder) AppendByte(b byte)

AppendByte Append a single byte to this builder.

func (*Builder) AppendBytes ¶

func (r *Builder) AppendBytes(b []byte)

AppendBytes Append the provided bytes to this builder.

func (*Builder) ByteAt ¶

func (r *Builder) ByteAt(offset int) byte

ByteAt Return the byte at the given offset.

func (*Builder) Bytes ¶

func (r *Builder) Bytes() []byte

Bytes Return a reference to the bytes of this builder.

func (*Builder) Clear ¶

func (r *Builder) Clear()

func (*Builder) CopyBytes ¶

func (r *Builder) CopyBytes(b []byte, off, length int)

CopyBytes Replace the content of this builder with the provided bytes. Equivalent to calling clear() and then append(byte[], int, int).

func (*Builder) CopyBytesBuilder ¶

func (r *Builder) CopyBytesBuilder(builder *Builder)

CopyBytesBuilder Replace the content of this builder with the provided bytes. Equivalent to calling clear() and then append(Builder).

func (*Builder) Get ¶

func (r *Builder) Get() []byte

func (*Builder) Grow ¶

func (r *Builder) Grow(capacity int)

Grow Ensure that this builder can hold at least capacity bytes without resizing.

func (*Builder) Length ¶

func (r *Builder) Length() int

Length Return the number of bytes in this buffer.

func (*Builder) SetByteAt ¶

func (r *Builder) SetByteAt(offset int, b byte)

SetByteAt Set a byte.

func (*Builder) SetLength ¶

func (r *Builder) SetLength(length int)

SetLength Set the length.

type BytesHash ¶

type BytesHash struct {
	sync.Mutex
	// contains filtered or unexported fields
}

BytesHash is a special purpose hash-map like data-structure optimized for BytesRef instances. BytesHash maintains mappings of byte arrays to ids (Map<BytesRef,int>) storing the hashed bytes efficiently in continuous storage. The mapping to the id is encapsulated inside BytesHash and is guaranteed to be increased for each added BytesRef.

BytesHash是一种专门为[]byte实例优化的类似哈希映射的数据结构。 BytesHash维护字节数组到id（map<[]byte, int>）的映射，有效地将散列字节存储在连续存储中。到id的映射封装在BytesHash中，并保证每添加一个BytesRef都会被添加。

Note: The maximum capacity BytesRef instance passed to add(BytesRef) must not be longer than ByteBlockPool.BYTE_BLOCK_SIZE-2. The internal storage is limited to 2GB total byte storage.

func NewBytesHash ¶

func NewBytesHash(pool *BlockPool, options ...BytesHashOption) (*BytesHash, error)

func (*BytesHash) Add ¶

func (r *BytesHash) Add(bs []byte) (int, error)

Add adds a new []byte bytes: the bytes to hash the id the given bytes are hashed if there was no mapping for the given bytes, otherwise (-(id)-1). This guarantees that the return value will always be >= 0 if the given bytes haven't been hashed before.

func (*BytesHash) AddByPoolOffset ¶

func (r *BytesHash) AddByPoolOffset(offset uint32) int

AddByPoolOffset Adds a "arbitrary" int offset instead of a BytesRef term. This is used in the indexer to hold the hash for term vectors, because they do not redundantly store the byte[] term directly and instead reference the byte[] term already stored by the postings BytesHash. See add(int textStart) in TermsHashPerField.

func (*BytesHash) ByteStart ¶

func (r *BytesHash) ByteStart(bytesID int) uint32

ByteStart Returns the bytesStart offset into the internally used BlockPool for the given bytesID Params: bytesID – the id to look up Returns: the bytesStart offset into the internally used BlockPool for the given id

func (*BytesHash) Clear ¶

func (r *BytesHash) Clear(resetPool bool)

func (*BytesHash) Close ¶

func (r *BytesHash) Close()

func (*BytesHash) Compact ¶

func (r *BytesHash) Compact() []int

Compact Returns the ids array in arbitrary order. Valid ids start at offset of 0 and end at a limit of size() - 1 Note: This is a destructive operation. clear() must be called in order to reuse this BytesHash instance.

func (*BytesHash) Find ¶

func (r *BytesHash) Find(bytes []byte) int

func (*BytesHash) Get ¶

func (r *BytesHash) Get(id int) []byte

Get Populates and returns a BytesRef with the bytes for the given bytesID. Note: the given bytesID must be a positive integer less than the current size (size()) bytesID: the id ref: the BytesRef to populate Returns: the given BytesRef instance populated with the bytes for the given bytesID

func (*BytesHash) ReInit ¶

func (r *BytesHash) ReInit()

ReInit reinitializes the BytesHash after a previous clear() call. If clear() has not been called previously this method has no effect.

func (*BytesHash) Size ¶

func (r *BytesHash) Size() int

Size Returns the number of []byte/BytesRef values in this BytesHash. Returns: the number of BytesRef values in this BytesHash.

func (*BytesHash) Sort ¶

func (r *BytesHash) Sort() []int

Sort Returns the values array sorted by the referenced byte values. Note: This is a destructive operation. clear() must be called in order to reuse this BytesHash instance.

type BytesHashOption ¶

type BytesHashOption func(*bytesHashOption)

func WithCapacity ¶

func WithCapacity(capacity int) BytesHashOption

WithCapacity capacity 需要是2的平方，如 4\16\32等

func WithHash32 ¶

func WithHash32(hasher hash.Hash32) BytesHashOption

func WithStartArray ¶

func WithStartArray(startArray StartArray) BytesHashOption

type BytesIterator ¶

type BytesIterator interface {
	// Next Increments the iteration to the next BytesRef in the iterator. Returns the resulting BytesRef or
	// null if the end of the iterator is reached. The returned BytesRef may be re-used across calls to next.
	// After this method returns null, do not call it again: the results are undefined.
	// Returns: the next BytesRef in the iterator or null if the end of the iterator is reached.
	// Throws: 	IOException – If there is a low-level I/O error.
	Next(ctx context.Context) ([]byte, error)
}

type DirectStartArray ¶

type DirectStartArray struct {
	// contains filtered or unexported fields
}

DirectStartArray A simple BytesHash.BytesStartArray that tracks memory allocation using a private Counter instance.

func NewDirectStartArray ¶

func NewDirectStartArray(initSize int) *DirectStartArray

func (*DirectStartArray) Clear ¶

func (d *DirectStartArray) Clear() []uint32

func (*DirectStartArray) Grow ¶

func (d *DirectStartArray) Grow() []uint32

func (*DirectStartArray) Init ¶

func (d *DirectStartArray) Init() []uint32

type DirectTrackingAllocator ¶

type DirectTrackingAllocator struct {
}

type SortState ¶

type SortState struct {
	// contains filtered or unexported fields
}

type StartArray ¶

type StartArray interface {
	// Init Initializes the StartArray. This call will allocate memory
	// Returns: the initialized bytes start array
	Init() []uint32

	// Grow Grows the BytesHash.BytesStartArray
	// Returns: the grown array
	Grow() []uint32

	// Clear clears the BytesHash.BytesStartArray and returns the cleared instance.
	// Returns: the cleared instance, this might be null
	Clear() []uint32
}

StartArray Manages allocation of the per-term addresses.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL