Documentation ¶
Index ¶
- Constants
- Variables
- func FromBuckwalter(sen string) string
- func IsArabicLetter(letter rune) bool
- func IsContentClean(content string) bool
- func IsPunctuation(letter rune) bool
- func IsShadda(letter rune) bool
- func IsShortVowel(letter rune) bool
- func IsTanween(letter rune) bool
- func IsValid(pointedWord string) bool
- func IsWhitespace(letter rune) bool
- func PunctuationRegex() *regexp.Regexp
- func RemoveExtraWhitespace(content string) string
- type Excerpt
- type ExcerptIterator
- type LetterPack
- type Sentence
- type Word
Constants ¶
const ( // Shadda Shadda = rune(0x0651) // Short vowels Sukoon = rune(0x0652) Damma = rune(0x064F) Fatha = rune(0x064E) Kasra = rune(0x0650) Dammatan = rune(0x064C) Fathatan = rune(0x064B) Kasratan = rune(0x064D) // Misc Placeholder = rune(0x25CC) SuperscriptAlef = rune(0x670) // Punctuation ArabicQuestionMark = rune(0x61F) LeftAngleQuotationMark = rune(0x00AB) RightAngleQuotationMark = rune(0x00BB) Period rune = '.' Colon rune = ':' QuotationMark rune = '"' ArabicComma = rune(0x060C) EmDash = '—' // Letters Hamza = rune(0x0621) AlefWithMadda = rune(0x0622) AlefWithHamzaAbove = rune(0x0623) WawWithHamza = rune(0x0624) AlefWithHamzaBelow = rune(0x0625) YehWithHamzaAbove = rune(0x0626) Alef = rune(0x0627) Beh = rune(0x0628) TehMarbuta = rune(0x0629) Teh = rune(0x062A) Theh = rune(0x062B) Jeem = rune(0x062C) Hah = rune(0x062D) Khah = rune(0x062E) Dal = rune(0x062F) Thal = rune(0x0630) Reh = rune(0x0631) Zain = rune(0x0632) Seen = rune(0x0633) Sheen = rune(0x0634) Sad = rune(0x0635) Dad = rune(0x0636) Tah = rune(0x0637) Zah = rune(0x0638) Ain = rune(0x0639) Ghain = rune(0x063A) Feh = rune(0x0641) Qaf = rune(0x0642) Kaf = rune(0x0643) Lam = rune(0x0644) Meem = rune(0x0645) Noon = rune(0x0646) Heh = rune(0x0647) Waw = rune(0x0648) AlefMaksura = rune(0x0649) Yeh = rune(0x064A) AlefWaslah = rune(0x0671) Tatweel = rune(0x0640) )
Variables ¶
var GrammaticalTags = []string{
"اسم مرفوع",
"اسم منصوب",
"اسم مجرور",
" فعل مرفوع",
"فعل منصوب",
"فعل مجزوم",
"مبني",
"توابع",
"مضارع مرفوع",
"مضارع منصوب بحرف النصب",
"مضارع مجزوم بحرف الجزم",
"مضارع مجزوم بأداة الشرط الجازم",
"مبتدأ",
"خبر",
"فاعل نائب",
"فاعل",
"اسم كان وأخواتها",
"خبر إن وأخواتها",
"مفعول به",
"مفعول به ثان",
"مفعول به ثالث",
"مفعول فيه",
"مفعول مطلق",
"مفعول لأجله",
"مفعول معه",
"حال",
"تمييز",
"مستثنى",
"حصر",
"منادى",
" اسم وخبر ظن وأخواتها",
"اسم وخبر حرف نفي",
"اسم إن وأخواتها",
" خبر كان واخواتها",
"مضاف إليه",
"اسم بعد حرف جر",
"نعت",
"اسم معطوف",
"توكيد",
"بدل",
}
Functions ¶
func FromBuckwalter ¶
func IsArabicLetter ¶
IsArabicLetter checks if a letter is part of the classical Arabic script. It returns false for tashkeel
func IsContentClean ¶
IsContentClean ensures that all characters conform to Kalam's character set
func IsPunctuation ¶
IsPunctuation cheks if a latter is part of the accepted punctuation
func IsShortVowel ¶
IsVowel checks if the character is a fatha, kasra, damma, or sukoon, with their tanween variations. It returns false for shadda and long vowels like the alef.
func IsValid ¶ added in v0.7.4
IsValid checks if every Arabic letter in pointedWord has a vowel, and that each letter only has one vowel and only one optional shadda IsValid makes a call to IsContentClean
func IsWhitespace ¶
IsWhitespace is preferred over unicode.IsSpace since we have our own whitespace rules
func PunctuationRegex ¶ added in v0.2.0
func RemoveExtraWhitespace ¶
RemoveExtraWhitespace removes unnecessary whitespace, ensuring that there are no double spaces and no beginning/ending whitespace.
Types ¶
type Excerpt ¶
func (Excerpt) Iterator ¶ added in v0.4.0
func (e Excerpt) Iterator() (ExcerptIterator, bool)
Iterator returns an ExcerptIterator which points to the first quizzable word
type ExcerptIterator ¶ added in v0.4.0
func (ExcerptIterator) Next ¶ added in v0.4.0
func (i ExcerptIterator) Next() (ExcerptIterator, bool)
Next returns the next quizzable word. If there are no more words, it returns true
func (ExcerptIterator) Sentence ¶ added in v0.4.0
func (i ExcerptIterator) Sentence() Sentence
func (ExcerptIterator) Word ¶ added in v0.4.0
func (i ExcerptIterator) Word() Word
type LetterPack ¶
func LetterPackFromString ¶ added in v0.7.0
func LetterPackFromString(str string) LetterPack
func LetterPacks ¶ added in v0.7.4
func LetterPacks(pointedWord string) []LetterPack
LetterPacks breaks down each letter from pointedWord into a LetterPack struct LetterPacks assumes pointedWord is valid
func (LetterPack) EqualTo ¶ added in v0.7.0
func (l LetterPack) EqualTo(o LetterPack) bool
func (LetterPack) String ¶
func (l LetterPack) String() string
func (LetterPack) Unpointed ¶ added in v0.5.0
func (l LetterPack) Unpointed(showShadda bool) string
type Word ¶
type Word struct { PointedWord string Tags []string Punctuation bool Ignore bool // Preceding is true if it preceeds another word or punctuation without // any space. Preceding bool }
func (Word) Termination ¶ added in v0.3.0
func (w Word) Termination() LetterPack
Termination returns the last letter of w