Documentation ¶
Index ¶
- type Jmdict
- type JmdictEntry
- type JmdictExample
- type JmdictExampleSentence
- type JmdictExampleSource
- type JmdictGlossary
- type JmdictKanji
- type JmdictReading
- type JmdictSense
- type JmdictSource
- type Jmnedict
- type JmnedictEntry
- type JmnedictKanji
- type JmnedictReading
- type JmnedictTranslation
- type Kanjidic
- type KanjidicCharacter
- type KanjidicCodepoint
- type KanjidicDicNumber
- type KanjidicHeader
- type KanjidicMeaning
- type KanjidicMisc
- type KanjidicQueryCode
- type KanjidicRadical
- type KanjidicReading
- type KanjidicReadingMeaning
- type KanjidicVariant
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Jmdict ¶
type Jmdict struct { // Entries consist of kanji elements, reading elements, // general information and sense elements. Each entry must have at // least one reading element and one sense element. Others are optional. Entries []JmdictEntry `xml:"entry"` }
type JmdictEntry ¶
type JmdictEntry struct { // A unique numeric sequence number for each entry Sequence int `xml:"ent_seq"` // The kanji element, or in its absence, the reading element, is // the defining component of each entry. // The overwhelming majority of entries will have a single kanji // element associated with a word in Japanese. Where there are // multiple kanji elements within an entry, they will be orthographical // variants of the same word, either using variations in okurigana, or // alternative and equivalent kanji. Common "mis-spellings" may be // included, provided they are associated with appropriate information // fields. Synonyms are not included; they may be indicated in the // cross-reference field associated with the sense element. Kanji []JmdictKanji `xml:"k_ele"` // The reading element typically contains the valid readings // of the word(s) in the kanji element using modern kanadzukai. // Where there are multiple reading elements, they will typically be // alternative readings of the kanji element. In the absence of a // kanji element, i.e. in the case of a word or phrase written // entirely in kana, these elements will define the entry. Readings []JmdictReading `xml:"r_ele"` // The sense element will record the translational equivalent // of the Japanese word, plus other related information. Where there // are several distinctly different meanings of the word, multiple // sense elements will be employed. Sense []JmdictSense `xml:"sense"` }
type JmdictExample ¶
type JmdictExample struct { // Each example has a Srce element that indicates the source of the example // the source is typically the Tatoeba Project Srce JmdictExampleSource `xml:"ex_srce"` // The term associated with this example Text string `xml:"ex_text"` // Contains the Example sentences Sentences []JmdictExampleSentence `xml:"ex_sent"` }
type JmdictExampleSentence ¶
type JmdictExampleSource ¶
type JmdictGlossary ¶
type JmdictGlossary struct { Content string `xml:",chardata"` // The xml:lang attribute defines the target language of the // gloss. It will be coded using the three-letter language code from // the ISO 639 standard. When absent, the value "eng" (i.e. English) // is the default value. Language *string `xml:"lang,attr"` // The g_gend attribute defines the gender of the gloss (typically // a noun in the target language. When absent, the gender is either // not relevant or has yet to be provided. Gender *string `xml:"g_gend"` // g_type attribute added in jmdict Rev 1.09 // At present the values used are "lit", "fig", "expl" and "tm". It is // proposed to add a "descr" value to indicate a gloss which is a // description of the Japanese term rather than a translation or an // explanation of the meaning. Type *string `xml:"g_type,attr"` }
type JmdictKanji ¶
type JmdictKanji struct { // This element will contain a word or short phrase in Japanese // which is written using at least one non-kana character (usually kanji, // but can be other characters). The valid characters are // kanji, kana, related characters such as chouon and kurikaeshi, and // in exceptional cases, letters from other alphabets. Expression string `xml:"keb"` // This is a coded information field related specifically to the // orthography of the keb, and will typically indicate some unusual // aspect, such as okurigana irregularity. Information []string `xml:"ke_inf"` // This and the equivalent re_pri field are provided to record // information about the relative priority of the entry, and consist // of codes indicating the word appears in various references which // can be taken as an indication of the frequency with which the word // is used. This field is intended for use either by applications which // want to concentrate on entries of a particular priority, or to // generate subset files. // The current values in this field are: // - news1/2: appears in the "wordfreq" file compiled by Alexandre Girardi // from the Mainichi Shimbun. (See the Monash ftp archive for a copy.) // Words in the first 12,000 in that file are marked "news1" and words // in the second 12,000 are marked "news2". // - ichi1/2: appears in the "Ichimango goi bunruishuu", Senmon Kyouiku // Publishing, Tokyo, 1998. (The entries marked "ichi2" were // demoted from ichi1 because they were observed to have low // frequencies in the WWW and newspapers.) // - spec1 and spec2: a small number of words use this marker when they // are detected as being common, but are not included in other lists. // - gai1/2: common loanwords, based on the wordfreq file. // - nfxx: this is an indicator of frequency-of-use ranking in the // wordfreq file. "xx" is the number of the set of 500 words in which // the entry can be found, with "01" assigned to the first 500, "02" // to the second, and so on. (The entries with news1, ichi1, spec1 and // gai1 values are marked with a "(P)" in the EDICT and EDICT2 // files.) // The reason both the kanji and reading elements are tagged is because // on occasions a priority is only associated with a particular // kanji/reading pair. Priorities []string `xml:"ke_pri"` }
type JmdictReading ¶
type JmdictReading struct { // This element content is restricted to kana and related // characters such as chouon and kurikaeshi. Kana usage will be // consistent between the keb and reb elements; e.g. if the keb // contains katakana, so too will the reb. Reading string `xml:"reb"` // This element, which will usually have a null value, indicates // that the reb, while associated with the keb, cannot be regarded // as a true reading of the kanji. It is typically used for words // such as foreign place names, gairaigo which can be in kanji or // katakana, etc. NoKanji *string `xml:"re_nokanji"` // This element is used to indicate when the reading only applies // to a subset of the keb elements in the entry. In its absence, all // readings apply to all kanji elements. The contents of this element // must exactly match those of one of the keb elements. Restrictions []string `xml:"re_restr"` // General coded information pertaining to the specific reading. // Typically it will be used to indicate some unusual aspect of // the reading. Information []string `xml:"re_inf"` // See the comment on ke_pri above. Priorities []string `xml:"re_pri"` }
type JmdictSense ¶
type JmdictSense struct { // These elements, if present, indicate that the sense is restricted // to the lexeme represented by the keb and/or reb. RestrictedKanji []string `xml:"stagk"` RestrictedReadings []string `xml:"stagr"` // This element is used to indicate a cross-reference to another // entry with a similar or related meaning or sense. The content of // this element is typically a keb or reb element in another entry. In some // cases a keb will be followed by a reb and/or a sense number to provide // a precise target for the cross-reference. Where this happens, a JIS // "centre-dot" (0x2126) is placed between the components of the // cross-reference. References []string `xml:"xref"` // This element is used to indicate another entry which is an // antonym of the current entry/sense. The content of this element // must exactly match that of a keb or reb element in another entry. Antonyms []string `xml:"ant"` // Part-of-speech information about the entry/sense. Should use // appropriate entity codes. In general where there are multiple senses // in an entry, the part-of-speech of an earlier sense will apply to // later senses unless there is a new part-of-speech indicated. PartsOfSpeech []string `xml:"pos"` // Information about the field of application of the entry/sense. // When absent, general application is implied. Entity coding for // specific fields of application. Fields []string `xml:"field"` // This element is used for other relevant information about // the entry/sense. As with part-of-speech, information will usually // apply to several senses. Misc []string `xml:"misc"` // This element records the information about the source // language(s) of a loan-word/gairaigo. If the source language is other // than English, the language is indicated by the xml:lang attribute. // The element value (if any) is the source word or phrase. SourceLanguages []JmdictSource `xml:"lsource"` // For words specifically associated with regional dialects in // Japanese, the entity code for that dialect, e.g. ksb for Kansaiben. Dialects []string `xml:"dial"` // The sense-information elements provided for additional // information to be recorded about a sense. Typical usage would // be to indicate such things as level of currency of a sense, the // regional variations, etc. Information []string `xml:"s_inf"` // Within each sense will be one or more "glosses", i.e. // target-language words or phrases which are equivalents to the // Japanese word. This element would normally be present, however it // may be omitted in entries which are purely for a cross-reference. Glossary []JmdictGlossary `xml:"gloss"` // Some JMdict entries can contain 0 or more examples Examples []JmdictExample `xml:"example"` }
type JmdictSource ¶
type JmdictSource struct { Content string `xml:",chardata"` // The xml:lang attribute defines the language(s) from which // a loanword is drawn. It will be coded using the three-letter language // code from the ISO 639-2 standard. When absent, the value "eng" (i.e. // English) is the default value. The bibliographic (B) codes are used. Language *string `xml:"lang,attr"` // The ls_type attribute indicates whether the lsource element // fully or partially describes the source word or phrase of the // loanword. If absent, it will have the implied value of "full". // Otherwise it will contain "part". Type *string `xml:"ls_type,attr"` // The ls_wasei attribute indicates that the Japanese word // has been constructed from words in the source language, and // not from an actual phrase in that language. Most commonly used to // indicate "waseieigo". Wasei string `xml:"ls_wasei,attr"` }
type Jmnedict ¶
type Jmnedict struct { // Entries consist of kanji elements, reading elements // name translation elements. Each entry must have at // least one reading element and one sense element. Others are optional. Entries []JmnedictEntry `xml:"entry"` }
type JmnedictEntry ¶
type JmnedictEntry struct { // A unique numeric sequence number for each entry Sequence int `xml:"ent_seq"` // The kanji element, or in its absence, the reading element, is // the defining component of each entry. // The overwhelming majority of entries will have a single kanji // element associated with an entity name in Japanese. Where there are // multiple kanji elements within an entry, they will be orthographical // variants of the same word, either using variations in okurigana, or // alternative and equivalent kanji. Common "mis-spellings" may be // included, provided they are associated with appropriate information // fields. Synonyms are not included; they may be indicated in the // cross-reference field associated with the sense element. Kanji []JmnedictKanji `xml:"k_ele"` // The reading element typically contains the valid readings // of the word(s) in the kanji element using modern kanadzukai. // Where there are multiple reading elements, they will typically be // alternative readings of the kanji element. In the absence of a // kanji element, i.e. in the case of a word or phrase written // entirely in kana, these elements will define the entry. Readings []JmnedictReading `xml:"r_ele"` // The trans element will record the translational equivalent // of the Japanese name, plus other related information. Translations []JmnedictTranslation `xml:"trans"` }
type JmnedictKanji ¶
type JmnedictKanji struct { // This element will contain an entity name in Japanese // which is written using at least one non-kana character (usually // kanji, but can be other characters). The valid // characters are kanji, kana, related characters such as chouon and // kurikaeshi, and in exceptional cases, letters from other alphabets. Expression string `xml:"keb"` // This is a coded information field related specifically to the // orthography of the keb, and will typically indicate some unusual // aspect, such as okurigana irregularity. Information []string `xml:"ke_inf"` // This and the equivalent re_pri field are provided to record // information about the relative priority of the entry, and are for // use either by applications which want to concentrate on entries of // a particular priority, or to generate subset files. The reason // both the kanji and reading elements are tagged is because on // occasions a priority is only associated with a particular // kanji/reading pair. Priorities []string `xml:"ke_pri"` }
type JmnedictReading ¶
type JmnedictReading struct { // This element content is restricted to kana and related // characters such as chouon and kurikaeshi. Kana usage will be // consistent between the keb and reb elements; e.g. if the keb // contains katakana, so too will the reb. Reading string `xml:"reb"` // This element is used to indicate when the reading only applies // to a subset of the keb elements in the entry. In its absence, all // readings apply to all kanji elements. The contents of this element // must exactly match those of one of the keb elements. Restrictions []string `xml:"re_restr"` // General coded information pertaining to the specific reading. // Typically it will be used to indicate some unusual aspect of // the reading. Information []string `xml:"re_inf"` // See the comment on ke_pri above. Priorities []string `xml:"re_pri"` }
type JmnedictTranslation ¶
type JmnedictTranslation struct { // The type of name, recorded in the appropriate entity codes. NameTypes []string `xml:"name_type"` // This element is used to indicate a cross-reference to another // entry with a similar or related meaning or sense. The content of // this element is typically a keb or reb element in another entry. In some // cases a keb will be followed by a reb and/or a sense number to provide // a precise target for the cross-reference. Where this happens, a JIS // "centre-dot" (0x2126) is placed between the components of the // cross-reference. References []string `xml:"xref"` // The actual translations of the name, usually as a transcription // into the target language. Translations []string `xml:"trans_det"` // The xml:lang attribute defines the target language of the // translated name. It will be coded using the three-letter language // code from the ISO 639-2 standard. When absent, the value "eng" // (i.e. English) is the default value. The bibliographic (B) codes // are used. Language *string `xml:"lang,attr"` }
type Kanjidic ¶
type Kanjidic struct { // The single header element will contain identification information // about the version of the file Header KanjidicHeader `xml:"header"` Characters []KanjidicCharacter `xml:"character"` }
type KanjidicCharacter ¶
type KanjidicCharacter struct { // The character itself in UTF8 coding. Literal string `xml:"literal"` // The codepoint element states the code of the character in the various // character set standards. Codepoint []KanjidicCodepoint `xml:"codepoint>cp_value"` // The radical number, in the range 1 to 214. The particular // classification type is stated in the rad_type attribute. Radical []KanjidicRadical `xml:"radical>rad_value"` Misc KanjidicMisc `xml:"misc"` // This element contains the index numbers and similar unstructured // information such as page numbers in a number of published dictionaries, // and instructional books on kanji. DictionaryNumbers []KanjidicDicNumber `xml:"dic_number>dic_ref"` // These codes contain information relating to the glyph, and can be used // for finding a required kanji. The type of code is defined by the // qc_type attribute. QueryCode []KanjidicQueryCode `xml:"query_code>q_code"` // The readings for the kanji in several languages, and the meanings, also // in several languages. The readings and meanings are grouped to enable // the handling of the situation where the meaning is differentiated by // reading. [T1] ReadingMeaning *KanjidicReadingMeaning `xml:"reading_meaning"` }
type KanjidicCodepoint ¶
type KanjidicCodepoint struct { // The cp_value contains the codepoint of the character in a particular // standard. The standard will be identified in the cp_type attribute. Value string `xml:",chardata"` // The cp_type attribute states the coding standard applying to the // element. The values assigned so far are: // jis208 - JIS X 0208-1997 - kuten coding (nn-nn) // jis212 - JIS X 0212-1990 - kuten coding (nn-nn) // jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn) // ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits) Type string `xml:"cp_type,attr"` }
type KanjidicDicNumber ¶
type KanjidicDicNumber struct { Value string `xml:",chardata"` // The dr_type defines the dictionary or reference book, etc. to which // dic_ref element applies. The initial allocation is: // nelson_c - "Modern Reader's Japanese-English Character Dictionary", // edited by Andrew Nelson (now published as the "Classic" // Nelson). // nelson_n - "The New Nelson Japanese-English Character Dictionary", // edited by John Haig. // halpern_njecd - "New Japanese-English Character Dictionary", // edited by Jack Halpern. // halpern_kkd - "Kodansha Kanji Dictionary", (2nd Ed. of the NJECD) // edited by Jack Halpern. // halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by // Jack Halpern. // halpern_kkld_2ed - "Kanji Learners Dictionary" (Kodansha), 2nd edition // (2013) edited by Jack Halpern. // heisig - "Remembering The Kanji" by James Heisig. // heisig6 - "Remembering The Kanji, Sixth Ed." by James Heisig. // gakken - "A New Dictionary of Kanji Usage" (Gakken) // oneill_names - "Japanese Names", by P.G. O'Neill. // oneill_kk - "Essential Kanji" by P.G. O'Neill. // moro - "Daikanwajiten" compiled by Morohashi. For some kanji two // additional attributes are used: m_vol: the volume of the // dictionary in which the kanji is found, and m_page: the page // number in the volume. // henshall - "A Guide To Remembering Japanese Characters" by // Kenneth G. Henshall. // sh_kk - "Kanji and Kana" by Spahn and Hadamitzky. // sh_kk2 - "Kanji and Kana" by Spahn and Hadamitzky (2011 edition). // sakade - "A Guide To Reading and Writing Japanese" edited by // Florence Sakade. // jf_cards - Japanese Kanji Flashcards, by Max Hodges and // Tomoko Okazaki. (Series 1) // henshall3 - "A Guide To Reading and Writing Japanese" 3rd // edition, edited by Henshall, Seeley and De Groot. // tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask. // crowley - "The Kanji Way to Japanese Language Power" by // Dale Crowley. // kanji_in_context - "Kanji in Context" by Nishiguchi and Kono. // busy_people - "Japanese For Busy People" vols I-III, published // by the AJLT. The codes are the volume.chapter. // kodansha_compact - the "Kodansha Compact Kanji Guide". // maniette - codes from Yves Maniette's "Les Kanjis dans la tete" French adaptation of Heisig. Type string `xml:"dr_type,attr"` // See above under "moro". Volume string `xml:"m_vol,attr"` // See above under "moro". Page string `xml:"m_page,attr"` }
type KanjidicHeader ¶
type KanjidicHeader struct { // This field denotes the version of kanjidic2 structure, as more // than one version may exist. FileVersion string `xml:"file_version"` // The version of the file, in the format YYYY-NN, where NN will be // a number starting with 01 for the first version released in a // calendar year, then increasing for each version in that year. DatabaseVersion string `xml:"database_version"` // The date the file was created in international format (YYYY-MM-DD). DateOfCreation string `xml:"date_of_creation"` }
type KanjidicMeaning ¶
type KanjidicMeaning struct { // The meaning associated with the kanji. Meaning string `xml:",chardata"` // The m_lang attribute defines the target language of the meaning. It // will be coded using the two-letter language code from the ISO 639-1 // standard. When absent, the value "en" (i.e. English) is implied. [{}] Language *string `xml:"m_lang,attr"` }
type KanjidicMisc ¶
type KanjidicMisc struct { // The kanji grade level. 1 through 6 indicates a Kyouiku kanji // and the grade in which the kanji is taught in Japanese schools. // 8 indicates it is one of the remaining Jouyou Kanji to be learned // in junior high school, and 9 or 10 indicates it is a Jinmeiyou (for use // in names) kanji. [G] Grade *string `xml:"grade"` // The stroke count of the kanji, including the radical. If more than // one, the first is considered the accepted count, while subsequent ones // are common miscounts. (See Appendix E. of the KANJIDIC documentation // for some of the rules applied when counting strokes in some of the // radicals.) [S] StrokeCounts []string `xml:"stroke_count"` // Either a cross-reference code to another kanji, usually regarded as a // variant, or an alternative indexing code for the current kanji. // The type of variant is given in the var_type attribute. Variants []KanjidicVariant `xml:"variant"` // A frequency-of-use ranking. The 2,500 most-used characters have a // ranking; those characters that lack this field are not ranked. The // frequency is a number from 1 to 2,500 that expresses the relative // frequency of occurrence of a character in modern Japanese. This is // based on a survey in newspapers, so it is biassed towards kanji // used in newspaper articles. The discrimination between the less // frequently used kanji is not strong. (Actually there are 2,501 // kanji ranked as there was a tie.) Frequency *string `xml:"freq"` // When the kanji is itself a radical and has a name, this element // contains the name (in hiragana.) [T2] RadicalName []string `xml:"rad_name"` // The (former) Japanese Language Proficiency test level for this kanji. // Values range from 1 (most advanced) to 4 (most elementary). This field // does not appear for kanji that were not required for any JLPT level. // Note that the JLPT test levels changed in 2010, with a new 5-level // system (N1 to N5) being introduced. No official kanji lists are // available for the new levels. The new levels are regarded as // being similar to the old levels except that the old level 2 is // now divided between N2 and N3. JlptLevel *string `xml:"jlpt"` }
type KanjidicQueryCode ¶
type KanjidicQueryCode struct { Value string `xml:",chardata"` // deroo - the codes developed by the late Father Joseph De Roo, and // published in his book "2001 Kanji" (Bonjinsha). Fr De Roo // gave his permission for these codes to be included. [DR] // misclass - a possible misclassification of the kanji according // to one of the code types. (See the "Z" codes in the KANJIDIC // documentation for more details.) Type string `xml:"qc_type,attr"` // The values of this attribute indicate the type if // misclassification: // - posn - a mistake in the division of the kanji // - stroke_count - a mistake in the number of strokes // - stroke_and_posn - mistakes in both division and strokes // - stroke_diff - ambiguous stroke counts depending on glyph Misclassification string `xml:"skip_misclass,attr"` }
type KanjidicRadical ¶
type KanjidicRadical struct { Value string `xml:",chardata"` // The rad_type attribute states the type of radical classification. // classical - as recorded in the KangXi Zidian. // nelson_c - as used in the Nelson "Modern Japanese-English // Character Dictionary" (i.e. the Classic, not the New Nelson). // This will only be used where Nelson reclassified the kanji. Type string `xml:"rad_type,attr"` }
type KanjidicReading ¶
type KanjidicReading struct { Value string `xml:",chardata"` // The r_type attribute defines the type of reading in the reading // element. The current values are: // pinyin - the modern PinYin romanization of the Chinese reading // of the kanji. The tones are represented by a concluding // digit. [Y] // korean_r - the romanized form of the Korean reading(s) of the // kanji. The readings are in the (Republic of Korea) Ministry // of Education style of romanization. [W] // korean_h - the Korean reading(s) of the kanji in hangul. // ja_on - the "on" Japanese reading of the kanji, in katakana. // Another attribute r_status, if present, will indicate with // a value of "jy" whether the reading is approved for a // "Jouyou kanji". // A further attribute on_type, if present, will indicate with // a value of kan, go, tou or kan'you the type of on-reading. // ja_kun - the "kun" Japanese reading of the kanji, usually in // hiragana. // Where relevant the okurigana is also included separated by a // ".". Readings associated with prefixes and suffixes are // marked with a "-". A second attribute r_status, if present, // will indicate with a value of "jy" whether the reading is // approved for a "Jouyou kanji". Type string `xml:"r_type,attr"` // See under ja_on above. OnType *string `xml:"on_type"` // See under ja_on and ja_kun above. JouyouStatus *string `xml:"r_status"` }
type KanjidicReadingMeaning ¶
type KanjidicReadingMeaning struct { // The reading element contains the reading or pronunciation // of the kanji. Readings []KanjidicReading `xml:"rmgroup>reading"` // The meaning associated with the kanji. Meanings []KanjidicMeaning `xml:"rmgroup>meaning"` // Japanese readings that are now only associated with names. Nanori []string `xml:"nanori"` }
type KanjidicVariant ¶
type KanjidicVariant struct { Value string `xml:",chardata"` // The var_type attribute indicates the type of variant code. The current // values are: // jis208 - in JIS X 0208 - kuten coding // jis212 - in JIS X 0212 - kuten coding // jis213 - in JIS X 0213 - kuten coding // (most of the above relate to "shinjitai/kyuujitai" // alternative character glyphs) // deroo - De Roo number - numeric // njecd - Halpern NJECD index number - numeric // s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor // nelson_c - "Classic" Nelson - numeric // oneill - Japanese Names (O'Neill) - numeric // ucs - Unicode codepoint- hex Type string `xml:"var_type"` }
Click to show internal directories.
Click to hide internal directories.