ar

package
v0.0.0-...-27f001b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 20, 2014 License: Apache-2.0 Imports: 4 Imported by: 0

Documentation

Overview

Copyright (c) 2014 Couchbase, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Copyright (c) 2014 Couchbase, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

Index

Constants

View Source
const (
	ALEF             = '\u0627'
	ALEF_MADDA       = '\u0622'
	ALEF_HAMZA_ABOVE = '\u0623'
	ALEF_HAMZA_BELOW = '\u0625'
	YEH              = '\u064A'
	DOTLESS_YEH      = '\u0649'
	TEH_MARBUTA      = '\u0629'
	HEH              = '\u0647'
	TATWEEL          = '\u0640'
	FATHATAN         = '\u064B'
	DAMMATAN         = '\u064C'
	KASRATAN         = '\u064D'
	FATHA            = '\u064E'
	DAMMA            = '\u064F'
	KASRA            = '\u0650'
	SHADDA           = '\u0651'
	SUKUN            = '\u0652'
)
View Source
const NormalizeName = "normalize_ar"
View Source
const StopName = "stop_ar"

Variables

View Source
var ArabicStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
# Cleaned on October 11, 2009 (not normalized, so use before normalization)
# This means that when modifying this list, you might need to add some 
# redundant entries, for example containing forms with both أ and ا
من
ومن
منها
منه
في
وفي
فيها
فيه
و
ف
ثم
او
أو
ب
بها
به
ا
أ
اى
اي
أي
أى
لا
ولا
الا
ألا
إلا
لكن
ما
وما
كما
فما
عن
مع
اذا
إذا
ان
أن
إن
انها
أنها
إنها
انه
أنه
إنه
بان
بأن
فان
فأن
وان
وأن
وإن
التى
التي
الذى
الذي
الذين
الى
الي
إلى
إلي
على
عليها
عليه
اما
أما
إما
ايضا
أيضا
كل
وكل
لم
ولم
لن
ولن
هى
هي
هو
وهى
وهي
وهو
فهى
فهي
فهو
انت
أنت
لك
لها
له
هذه
هذا
تلك
ذلك
هناك
كانت
كان
يكون
تكون
وكانت
وكان
غير
بعض
قد
نحو
بين
بينما
منذ
ضمن
حيث
الان
الآن
خلال
بعد
قبل
حتى
عند
عندما
لدى
جميع
`)

Functions

func NormalizerFilterConstructor

func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)

func StopTokenFilterConstructor

func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error)

func TokenMapConstructor

func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error)

Types

type ArabicNormalizeFilter

type ArabicNormalizeFilter struct {
}

func NewArabicNormalizeFilter

func NewArabicNormalizeFilter() *ArabicNormalizeFilter

func (*ArabicNormalizeFilter) Filter

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL