Documentation ¶
Index ¶
- Constants
- Variables
- func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)
- func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)
- func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)
- func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error)
- type SoraniNormalizeFilter
- type SoraniStemmerFilter
Constants ¶
View Source
const ( Yeh = '\u064A' DotlessYeh = '\u0649' FarsiYeh = '\u06CC' Kaf = '\u0643' Keheh = '\u06A9' Heh = '\u0647' Ae = '\u06D5' Zwnj = '\u200C' HehDoachashmee = '\u06BE' TehMarbuta = '\u0629' Reh = '\u0631' Rreh = '\u0695' RrehAbove = '\u0692' Tatweel = '\u0640' Fathatan = '\u064B' Dammatan = '\u064C' Kasratan = '\u064D' Fatha = '\u064E' Damma = '\u064F' Kasra = '\u0650' Shadda = '\u0651' Sukun = '\u0652' )
View Source
const NormalizeName = "normalize_ckb"
View Source
const StemmerName = "stemmer_ckb"
View Source
const StopName = "stop_ckb"
Variables ¶
View Source
var SoraniStopWords = []byte(`# set of kurdish stopwords
# note these have been normalized with our scheme (e represented with U+06D5, etc)
# constructed from:
# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
# and
و
# which
کە
# of
ی
# made/did
کرد
# that/which
ئەوەی
# on/head
سەر
# two
دوو
# also
هەروەها
# from/that
لەو
# makes/does
دەکات
# some
چەند
# every
هەر
# demonstratives
# that
ئەو
# this
ئەم
# personal pronouns
# I
من
# we
ئێمە
# you
تۆ
# you
ئێوە
# he/she/it
ئەو
# they
ئەوان
# prepositions
# to/with/by
بە
پێ
# without
بەبێ
# along with/while/during
بەدەم
# in the opinion of
بەلای
# according to
بەپێی
# before
بەرلە
# in the direction of
بەرەوی
# in front of/toward
بەرەوە
# before/in the face of
بەردەم
# without
بێ
# except for
بێجگە
# for
بۆ
# on/in
دە
تێ
# with
دەگەڵ
# after
دوای
# except for/aside from
جگە
# in/from
لە
لێ
# in front of/before/because of
لەبەر
# between/among
لەبەینی
# concerning/about
لەبابەت
# concerning
لەبارەی
# instead of
لەباتی
# beside
لەبن
# instead of
لەبرێتی
# behind
لەدەم
# with/together with
لەگەڵ
# by
لەلایەن
# within
لەناو
# between/among
لەنێو
# for the sake of
لەپێناوی
# with respect to
لەرەوی
# by means of/for
لەرێ
# for the sake of
لەرێگا
# on/on top of/according to
لەسەر
# under
لەژێر
# between/among
ناو
# between/among
نێوان
# after
پاش
# before
پێش
# like
وەک
`)
Functions ¶
Types ¶
type SoraniNormalizeFilter ¶
type SoraniNormalizeFilter struct { }
func NewSoraniNormalizeFilter ¶
func NewSoraniNormalizeFilter() *SoraniNormalizeFilter
func (*SoraniNormalizeFilter) Filter ¶
func (s *SoraniNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream
type SoraniStemmerFilter ¶
type SoraniStemmerFilter struct { }
func NewSoraniStemmerFilter ¶
func NewSoraniStemmerFilter() *SoraniStemmerFilter
func (*SoraniStemmerFilter) Filter ¶
func (s *SoraniStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream
Click to show internal directories.
Click to hide internal directories.