Documentation
¶
Overview ¶
Package preprocess performs preparsing filtering and modification of a scientific-name.
Index ¶
Constants ¶
This section is empty.
Variables ¶
var AmbiguousException = map[string][]string{
"Aeolesthes": {"mihi"},
"Agnetina": {"den"},
"Agra": {"not"},
"Aleuroclava": {"complex"},
"Allawrencius": {"complex"},
"Anisochaeta": {"mihi"},
"Antaplaga": {"dela"},
"Baeolidia": {"dela"},
"Bolbodeomyia": {"complex"},
"Bolitoglossa": {"la"},
"Campylosphaera": {"dela"},
"Castelnaudia": {"spec"},
"Cicada": {"complex"},
"Concinnum": {"ten"},
"Desmoxytes": {"des"},
"Dicentria": {"dela"},
"Dichostasia": {"complex"},
"Dimorphoceras": {"complex"},
"Dischidia": {"complex"},
"Ecnomus": {"complex"},
"Eresus": {"da"},
"Eucyclops": {"mihi"},
"Eulaira": {"dela"},
"Fusinus": {"complex"},
"Gnathopleustes": {"den"},
"Gobiosoma": {"spec"},
"Gonatobotrys": {"complex"},
"Heizmannia": {"complex"},
"Helophorus": {"ser"},
"Hemicloeina": {"spec"},
"Lampona": {"spec"},
"Leptonetela": {"la"},
"Libystica": {"complex"},
"Malamatidia": {"zu"},
"Meteorus": {"dos"},
"Nocaracris": {"van"},
"Notozomus": {"spec"},
"Ochodaeus": {"complex"},
"Odontella": {"do"},
"Oecetis": {"complex"},
"Oedipina": {"complex"},
"Oedipus": {"complex"},
"Oedopinola": {"complex"},
"Orcevia": {"zu"},
"Paradimorphoceras": {"complex"},
"Paralvinella": {"dela"},
"Parentia": {"do"},
"Phyllospongia": {"complex"},
"Plagiozopelma": {"du"},
"Plectrocnemia": {"complex"},
"Rubus": {"complex"},
"Ruteloryctes": {"bis"},
"Sceliphron": {"complex"},
"Scopaeus": {"complex"},
"Scoparia": {"dela"},
"Selenops": {"ab"},
"Semiothisa": {"da"},
"Serina": {"ser", "subser"},
"Schizura": {"dela"},
"Sigipinius": {"complex"},
"Stegosoladidus": {"complex"},
"Stenoecia": {"dos"},
"Sympycnus": {"du"},
"Tetracis": {"complex"},
"Tetramorium": {"do"},
"Tortolena": {"dela"},
"Trichosternus": {"spec"},
"Trisephena": {"complex"},
"Zodarion": {"van"},
}
var NoParseException = map[string]string{
"Navicula": "bacterium",
"Spirophora": "bacterium",
}
var VirusException = map[string]string{
"Aspilota": "vector",
"Bembidion": "satellites",
"Bolivina": "prion",
"Ceylonesmus": "vector",
"Cryptops": "vector",
"Culex": "vector",
"Dasyproctus": "cevirus",
"Desmoxytes": "vector",
"Dicathais": "vector",
"Erateina": "satellites",
"Euragallia": "prion",
"Exochus": "virus",
"Hilara": "vector",
"Ithomeis": "satellites",
"Microgoneplax": "prion",
"Neoaemula": "vector",
"Nephodia": "satellites",
"Ophion": "virus",
"Phalium": "vector",
"Psenulus": "trevirus",
"Tidabius": "vector",
"Turkozelotes": "attavirus",
}
Functions ¶
func CleanupStream ¶
func CleanupStream(in <-chan string, out chan<- *CleanupResult, wn int)
CleanupStream takes input and output string channels, and feeds output with pipe delimited strings with original name on the left and cleaned up name on the right from the pipe.
func StripTags ¶
StripTags takes a slice of bytes and returns a string with common tags removed and html entities escaped. It does keep all uncommon tags intact to let parser deal with them.
func UnderscoreToSpace ¶
UnderscoreToSpace takes a slice of bytes. If it finds that the string contains underscores, but not spaces, it substitutes underscores to spaces in the slice. In case if any spaces are present, the slice is returned unmodified.
Types ¶
type CleanupResult ¶
type CleanupResult struct { // Input is the original name. Input string // Output is the name after the tag removal. Output string }
CleanupResult keeps results of removal of some HTML tags.
type Preprocessor ¶
type Preprocessor struct { Virus bool Underscore bool NoParse bool DaggerChar bool Approximate bool Annotation bool Body []byte Tail []byte Ambiguous ambiguous }
Preprocessor structure keeps state of the preprocessor results.
func Preprocess ¶
func Preprocess(ppr *preparser.PreParser, bs []byte) *Preprocessor
Preprocess runs a series of regular expressions over the input to determine features of the input before parsing.