tokenizer

package
v0.0.1-alpha4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 3, 2020 License: Apache-2.0 Imports: 5 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	SpanishStopWords = []string{}/* 443 elements not displayed */

)
View Source
var (
	SpanishTokenizer = NewStopWordsTokenizer(SpanishStopWords)
)

Functions

This section is empty.

Types

type IsMarkNonSpacingChecker

type IsMarkNonSpacingChecker struct{}

func (*IsMarkNonSpacingChecker) Contains

func (c *IsMarkNonSpacingChecker) Contains(r rune) bool

type SimpleTokenizer

type SimpleTokenizer struct {
	Tf Transformer
}

func NewSimpleTokenizer

func NewSimpleTokenizer() *SimpleTokenizer

func (*SimpleTokenizer) Tokenize

func (s *SimpleTokenizer) Tokenize(payload []byte) [][]byte

func (*SimpleTokenizer) TokenizeSingle

func (s *SimpleTokenizer) TokenizeSingle(payload []byte) ([]byte, bool)

type StopWordsTokenizer

type StopWordsTokenizer struct {
	T *SimpleTokenizer
	// contains filtered or unexported fields
}

func NewStopWordsTokenizer

func NewStopWordsTokenizer(stopWords []string) *StopWordsTokenizer

func (*StopWordsTokenizer) Tokenize

func (st *StopWordsTokenizer) Tokenize(payload []byte) [][]byte

func (*StopWordsTokenizer) TokenizeSingle

func (st *StopWordsTokenizer) TokenizeSingle(payload []byte) ([]byte, bool)

type Tokenizer

type Tokenizer interface {
	Tokenize([]byte) [][]byte
	TokenizeSingle([]byte) ([]byte, bool)
}

type Transformer

type Transformer interface {
	Transform(payload []byte) ([]byte, error)
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL