tokenattributes

package
v0.0.0-...-d0be9ee Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 10, 2015 License: Apache-2.0 Imports: 2 Imported by: 26

Documentation

Index

Constants

View Source
const DEFAULT_TYPE = "word"
View Source
const MIN_BUFFER_SIZE = 10

Variables

View Source
var DEFAULT_ATTRIBUTE_FACTORY = new(DefaultAttributeFactory)

This is the default factory that creates AttributeImpls using the class name of the supplied Attribute interface class by appending Impl to it.

Functions

func NewPackedTokenAttribute

func NewPackedTokenAttribute() util.AttributeImpl

Types

type CharTermAttribute

type CharTermAttribute interface {
	// Copies the contents of buffer into the termBuffer array
	CopyBuffer(buffer []rune)
	// Returns the internal termBuffer rune slice which you can then
	// directly alter. If the slice is too small for your token, use
	// ResizeBuffer(int) to increase it. After altering the buffer, be
	// sure to call SetLength() to record the number of valid runes
	// that were placed into the termBuffer.
	//
	// NOTE: the returned buffer may be larger than the valid Length().
	Buffer() []rune
	Length() int
	// Appends teh specified string to this character sequence.
	//
	// The character of the string argument are appended, in order,
	// increasing the length of this sequence by the length of the
	// argument. If argument is "", then the three characters "nil" are
	// appended.
	AppendString(string) CharTermAttribute
}

The term text of a Token.

type CharTermAttributeImpl

type CharTermAttributeImpl struct {
	// contains filtered or unexported fields
}

Default implementation of CharTermAttribute.

func (*CharTermAttributeImpl) AppendString

func (a *CharTermAttributeImpl) AppendString(s string) CharTermAttribute

func (*CharTermAttributeImpl) Buffer

func (a *CharTermAttributeImpl) Buffer() []rune

func (*CharTermAttributeImpl) BytesRef

func (a *CharTermAttributeImpl) BytesRef() *util.BytesRef

func (*CharTermAttributeImpl) Clear

func (a *CharTermAttributeImpl) Clear()

func (*CharTermAttributeImpl) Clone

func (*CharTermAttributeImpl) CopyBuffer

func (a *CharTermAttributeImpl) CopyBuffer(buffer []rune)

func (*CharTermAttributeImpl) CopyTo

func (a *CharTermAttributeImpl) CopyTo(target util.AttributeImpl)

func (*CharTermAttributeImpl) FillBytesRef

func (a *CharTermAttributeImpl) FillBytesRef()

func (*CharTermAttributeImpl) Interfaces

func (a *CharTermAttributeImpl) Interfaces() []string

func (*CharTermAttributeImpl) Length

func (a *CharTermAttributeImpl) Length() int

func (*CharTermAttributeImpl) String

func (a *CharTermAttributeImpl) String() string

type DefaultAttributeFactory

type DefaultAttributeFactory struct{}

func (*DefaultAttributeFactory) Create

type OffsetAttribute

type OffsetAttribute interface {
	util.Attribute
	// Returns this Token's starting offset, the position of the first
	// character corresponding to this token in the source text.
	StartOffset() int
	// Returns this Token's starting offset, the position of the first
	// character corresponding to this token in the source text.
	//
	// Note that the difference between endOffset() and startOffset()
	// may not be equal to the termText.Length(), as the term text may
	// have been altered by a stemmer or some other filter.
	// StartOffset() int
	// Set the starting and ending offset.
	SetOffset(int, int)
	// Returns this TOken's ending offset, one greater than the
	// position of the last character corresponding to this token in
	// the source text. The length of the token in the source text is
	// (endOffset() - startOffset()).
	EndOffset() int
}

The start and end character offset of a Token.

type OffsetAttributeImpl

type OffsetAttributeImpl struct {
	// contains filtered or unexported fields
}

Default implementation of OffsetAttribute

func (*OffsetAttributeImpl) Clear

func (a *OffsetAttributeImpl) Clear()

func (*OffsetAttributeImpl) Clone

func (*OffsetAttributeImpl) CopyTo

func (a *OffsetAttributeImpl) CopyTo(target util.AttributeImpl)

func (*OffsetAttributeImpl) EndOffset

func (a *OffsetAttributeImpl) EndOffset() int

func (*OffsetAttributeImpl) Interfaces

func (a *OffsetAttributeImpl) Interfaces() []string

func (*OffsetAttributeImpl) SetOffset

func (a *OffsetAttributeImpl) SetOffset(startOffset, endOffset int)

func (*OffsetAttributeImpl) StartOffset

func (a *OffsetAttributeImpl) StartOffset() int

type PackedTokenAttributeImpl

type PackedTokenAttributeImpl struct {
	*CharTermAttributeImpl
	// contains filtered or unexported fields
}

Default implementation of the common attributes used by Lucene: - CharTermAttribute - TypeAttribute - PositionIncrementAttribute - PositionLengthAttribute - OffsetAttribute

func (*PackedTokenAttributeImpl) Clear

func (a *PackedTokenAttributeImpl) Clear()

func (*PackedTokenAttributeImpl) Clone

func (*PackedTokenAttributeImpl) CopyTo

func (a *PackedTokenAttributeImpl) CopyTo(target util.AttributeImpl)

func (*PackedTokenAttributeImpl) EndOffset

func (a *PackedTokenAttributeImpl) EndOffset() int

func (*PackedTokenAttributeImpl) Interfaces

func (a *PackedTokenAttributeImpl) Interfaces() []string

func (*PackedTokenAttributeImpl) PositionIncrement

func (a *PackedTokenAttributeImpl) PositionIncrement() int

func (*PackedTokenAttributeImpl) SetOffset

func (a *PackedTokenAttributeImpl) SetOffset(startOffset, endOffset int)

func (*PackedTokenAttributeImpl) SetPositionIncrement

func (a *PackedTokenAttributeImpl) SetPositionIncrement(positionIncrement int)

func (*PackedTokenAttributeImpl) SetType

func (a *PackedTokenAttributeImpl) SetType(typ string)

func (*PackedTokenAttributeImpl) StartOffset

func (a *PackedTokenAttributeImpl) StartOffset() int

type PayloadAttribute

type PayloadAttribute interface {
	util.Attribute
	// Returns this Token's payload
	Payload() []byte
	// Sets this Token's payload.
	SetPayload([]byte)
}

The payload of a Token.

The payload is stored in the index at each position, and can be used to influence scoring when using Payload-based queries in the payloads and spans packages.

NOTE: becaues the payload will be stored at each position, its usually best to use the minimum number of bytes necessary. Some codec implementations may optimize payload storage when all payloads have the same length.

type PayloadAttributeImpl

type PayloadAttributeImpl struct {
	// contains filtered or unexported fields
}

Default implementation of PayloadAttirbute

func (*PayloadAttributeImpl) Clear

func (a *PayloadAttributeImpl) Clear()

func (*PayloadAttributeImpl) Clone

func (*PayloadAttributeImpl) CopyTo

func (a *PayloadAttributeImpl) CopyTo(target util.AttributeImpl)

func (*PayloadAttributeImpl) Interfaces

func (a *PayloadAttributeImpl) Interfaces() []string

func (*PayloadAttributeImpl) Payload

func (a *PayloadAttributeImpl) Payload() []byte

func (*PayloadAttributeImpl) SetPayload

func (a *PayloadAttributeImpl) SetPayload(payload []byte)

type PositionIncrementAttribute

type PositionIncrementAttribute interface {
	util.Attribute
	// Set the position increment. The deafult value is one.
	SetPositionIncrement(int)
	// Returns the position increment of this token.
	PositionIncrement() int
}

Determines the position of this token relative to the previous Token in a TokenStream, used in phrase searching.

The default value is one.

Some common uses for this are:

  • Set it to zero to put multiple terms in the same position. This is useful if, e.g., a word has multiple stems. Searches for phrases including either stem will match. In this case, all but the first stem's increment should be set to zero: the increment of the first instance should be one. Repeating a token with an increment of zero can also be used to boost the scores of matches on that token.

  • Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want phrases to match across removed stop words, then one could build a stop word filter that removes stop words and also sets the incremeent to the number of stop words removed before each non-stop word. Then axact phrase queries will only match when the terms occur with no intervening stop words.

type PositionIncrementAttributeImpl

type PositionIncrementAttributeImpl struct {
	// contains filtered or unexported fields
}

Default implementation of ositionIncrementAttribute

func (*PositionIncrementAttributeImpl) Clear

func (a *PositionIncrementAttributeImpl) Clear()

func (*PositionIncrementAttributeImpl) Clone

func (*PositionIncrementAttributeImpl) CopyTo

func (*PositionIncrementAttributeImpl) Interfaces

func (a *PositionIncrementAttributeImpl) Interfaces() []string

func (*PositionIncrementAttributeImpl) PositionIncrement

func (a *PositionIncrementAttributeImpl) PositionIncrement() int

func (*PositionIncrementAttributeImpl) SetPositionIncrement

func (a *PositionIncrementAttributeImpl) SetPositionIncrement(positionIncrement int)

type PositionLengthAttribute

type PositionLengthAttribute interface {
	util.Attribute
	SetPositionLength(int)
}

type TermToBytesRefAttribute

type TermToBytesRefAttribute interface {
	// Updates the bytes BytesRef() to contain this term's final
	// encoding.
	FillBytesRef()
	// Retrieve this attribute's BytesRef. The bytes are updated from
	// the current term when the consumer calls FillBytesRef().
	BytesRef() *util.BytesRef
}

This attribute is requested by TermsHashPerField to index the contents. This attribute can be used to customize the final []byte encoding of terms.

Consumers of this attribute call BytesRef() up-front, and then invoke FillBytesRef() for each term. Examle:

termAtt := tokenStream.Get("TermToBytesRefAttribute")
bytes := termAtt.BytesRef();

var err error
var ok bool
for ok, err = tokenStream.IncrementToken(); ok && err == nil; ok, err = tokenStream.IncrementToken() {

	// you must call termAtt.FillBytesRef() before doing something with the bytes.
	// this encodes the term value (internally it might be a []rune, etc) into the bytes.
	hashCode := termAtt.FillBytesRef()

	if isIntersting(bytes) {

		// becaues the bytes are reused by the attribute (like CharTermAttribute's []rune buffer),
		// you should make a copy if you need persistent access to the bytes, otherwise they will
		// be rewritten across calls to IncrementToken()

		clone := make([]byte, len(bytes))
		copy(clone, bytes)
		doSomethingWith(cone)
	}
}
...

type TypeAttribute

type TypeAttribute interface {
	util.Attribute
	// Set the lexical type.
	SetType(string)
}

A Token's lexical type. The default value is "word".

type TypeAttributeImpl

type TypeAttributeImpl struct {
	// contains filtered or unexported fields
}

Default implementation of TypeAttribute

func (*TypeAttributeImpl) Clear

func (a *TypeAttributeImpl) Clear()

func (*TypeAttributeImpl) Clone

func (*TypeAttributeImpl) CopyTo

func (a *TypeAttributeImpl) CopyTo(target util.AttributeImpl)

func (*TypeAttributeImpl) Interfaces

func (a *TypeAttributeImpl) Interfaces() []string

func (*TypeAttributeImpl) SetType

func (a *TypeAttributeImpl) SetType(typ string)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL