jmdict

package module

v0.0.0-...-83a5a2b Latest Latest Go to latest Published: Dec 31, 2023 License: MIT Imports: 3 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

git.foosoft.net/alex/jmdict

Links

Open Source Insights

README ¶

JMDict

JMDict is a simple library written in Go for parsing the raw data files for the JMDict (vocabulary) JMnedict (names), and KANJIDIC (Kanji) dictionaries. As far as I know, these are the only publicly available Japanese dictionaries and are therefore used by a variety of tools (including Yomichan-Import from this site).

The XML format used to store dictionary entries and entity data was deceptively annoying to work with, leading to the creation of this library. Please see the documentation page for a technical overview of how to use this library.

Please import this library from foosoft.net/projects/jmdict and not the GitHub path.

Documentation ¶

Index ¶

type Jmdict
- func LoadJmdict(reader io.Reader) (Jmdict, map[string]string, error)
- func LoadJmdictNoTransform(reader io.Reader) (Jmdict, map[string]string, error)
type JmdictEntry
type JmdictExample
type JmdictExampleSentence
type JmdictExampleSource
type JmdictGlossary
type JmdictKanji
type JmdictReading
type JmdictSense
type JmdictSource
type Jmnedict
- func LoadJmnedict(reader io.Reader) (Jmnedict, map[string]string, error)
- func LoadJmnedictNoTransform(reader io.Reader) (Jmnedict, map[string]string, error)
type JmnedictEntry
type JmnedictKanji
type JmnedictReading
type JmnedictTranslation
type Kanjidic
- func LoadKanjidic(reader io.Reader) (Kanjidic, error)
type KanjidicCharacter
type KanjidicCodepoint
type KanjidicDicNumber
type KanjidicHeader
type KanjidicMeaning
type KanjidicMisc
type KanjidicQueryCode
type KanjidicRadical
type KanjidicReading
type KanjidicReadingMeaning
type KanjidicVariant

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Jmdict ¶

type Jmdict struct {
	// Entries consist of kanji elements, reading elements,
	// general information and sense elements. Each entry must have at
	// least one reading element and one sense element. Others are optional.
	Entries []JmdictEntry `xml:"entry"`
}

func LoadJmdict ¶

func LoadJmdict(reader io.Reader) (Jmdict, map[string]string, error)

func LoadJmdictNoTransform ¶

func LoadJmdictNoTransform(reader io.Reader) (Jmdict, map[string]string, error)

type JmdictEntry ¶

type JmdictEntry struct {
	// A unique numeric sequence number for each entry
	Sequence int `xml:"ent_seq"`

	// The kanji element, or in its absence, the reading element, is
	// the defining component of each entry.
	// The overwhelming majority of entries will have a single kanji
	// element associated with a word in Japanese. Where there are
	// multiple kanji elements within an entry, they will be orthographical
	// variants of the same word, either using variations in okurigana, or
	// alternative and equivalent kanji. Common "mis-spellings" may be
	// included, provided they are associated with appropriate information
	// fields. Synonyms are not included; they may be indicated in the
	// cross-reference field associated with the sense element.
	Kanji []JmdictKanji `xml:"k_ele"`

	// The reading element typically contains the valid readings
	// of the word(s) in the kanji element using modern kanadzukai.
	// Where there are multiple reading elements, they will typically be
	// alternative readings of the kanji element. In the absence of a
	// kanji element, i.e. in the case of a word or phrase written
	// entirely in kana, these elements will define the entry.
	Readings []JmdictReading `xml:"r_ele"`

	// The sense element will record the translational equivalent
	// of the Japanese word, plus other related information. Where there
	// are several distinctly different meanings of the word, multiple
	// sense elements will be employed.
	Sense []JmdictSense `xml:"sense"`
}

type JmdictExample ¶

type JmdictExample struct {
	// Each example has a Srce element that indicates the source of the example
	// the source is typically the  Tatoeba Project
	Srce JmdictExampleSource `xml:"ex_srce"`

	// The term associated with this example
	Text string `xml:"ex_text"`

	// Contains the Example sentences
	Sentences []JmdictExampleSentence `xml:"ex_sent"`
}

type JmdictExampleSentence ¶

type JmdictExampleSentence struct {
	// The language of the example sentence
	Lang string `xml:"lang,attr"`

	// The example sentence text
	Text string `xml:",chardata"`
}

type JmdictExampleSource ¶

type JmdictExampleSource struct {
	// The id of the example for the source
	ID string `xml:",chardata"`

	// The source type (i.e. 'tat' for tatoeba)
	SrcType string `xml:"exsrc_type,attr"`
}

type JmdictGlossary ¶

type JmdictGlossary struct {
	Content string `xml:",chardata"`

	// The xml:lang attribute defines the target language of the
	// gloss. It will be coded using the three-letter language code from
	// the ISO 639 standard. When absent, the value "eng" (i.e. English)
	// is the default value.
	Language *string `xml:"lang,attr"`

	// The g_gend attribute defines the gender of the gloss (typically
	// a noun in the target language. When absent, the gender is either
	// not relevant or has yet to be provided.
	Gender *string `xml:"g_gend"`

	// g_type attribute added in jmdict Rev 1.09
	// At present the values used are "lit", "fig", "expl" and "tm". It is
	// proposed to add a "descr" value to indicate a gloss which is a
	// description of the Japanese term rather than a translation or an
	// explanation of the meaning.
	Type *string `xml:"g_type,attr"`
}

type JmdictKanji ¶

type JmdictKanji struct {
	// This element will contain a word or short phrase in Japanese
	// which is written using at least one non-kana character (usually kanji,
	// but can be other characters). The valid characters are
	// kanji, kana, related characters such as chouon and kurikaeshi, and
	// in exceptional cases, letters from other alphabets.
	Expression string `xml:"keb"`

	// This is a coded information field related specifically to the
	// orthography of the keb, and will typically indicate some unusual
	// aspect, such as okurigana irregularity.
	Information []string `xml:"ke_inf"`

	// This and the equivalent re_pri field are provided to record
	// information about the relative priority of the entry,  and consist
	// of codes indicating the word appears in various references which
	// can be taken as an indication of the frequency with which the word
	// is used. This field is intended for use either by applications which
	// want to concentrate on entries of  a particular priority, or to
	// generate subset files.
	// The current values in this field are:
	// - news1/2: appears in the "wordfreq" file compiled by Alexandre Girardi
	// from the Mainichi Shimbun. (See the Monash ftp archive for a copy.)
	// Words in the first 12,000 in that file are marked "news1" and words
	// in the second 12,000 are marked "news2".
	// - ichi1/2: appears in the "Ichimango goi bunruishuu", Senmon Kyouiku
	// Publishing, Tokyo, 1998.  (The entries marked "ichi2" were
	// demoted from ichi1 because they were observed to have low
	// frequencies in the WWW and newspapers.)
	// - spec1 and spec2: a small number of words use this marker when they
	// are detected as being common, but are not included in other lists.
	// - gai1/2: common loanwords, based on the wordfreq file.
	// - nfxx: this is an indicator of frequency-of-use ranking in the
	// wordfreq file. "xx" is the number of the set of 500 words in which
	// the entry can be found, with "01" assigned to the first 500, "02"
	// to the second, and so on. (The entries with news1, ichi1, spec1 and
	// gai1 values are marked with a "(P)" in the EDICT and EDICT2
	// files.)
	// The reason both the kanji and reading elements are tagged is because
	// on occasions a priority is only associated with a particular
	// kanji/reading pair.
	Priorities []string `xml:"ke_pri"`
}

type JmdictReading ¶

type JmdictReading struct {
	// This element content is restricted to kana and related
	// characters such as chouon and kurikaeshi. Kana usage will be
	// consistent between the keb and reb elements; e.g. if the keb
	// contains katakana, so too will the reb.
	Reading string `xml:"reb"`

	// This element, which will usually have a null value, indicates
	// that the reb, while associated with the keb, cannot be regarded
	// as a true reading of the kanji. It is typically used for words
	// such as foreign place names, gairaigo which can be in kanji or
	// katakana, etc.
	NoKanji *string `xml:"re_nokanji"`

	// This element is used to indicate when the reading only applies
	// to a subset of the keb elements in the entry. In its absence, all
	// readings apply to all kanji elements. The contents of this element
	// must exactly match those of one of the keb elements.
	Restrictions []string `xml:"re_restr"`

	// General coded information pertaining to the specific reading.
	// Typically it will be used to indicate some unusual aspect of
	// the reading.
	Information []string `xml:"re_inf"`

	// See the comment on ke_pri above.
	Priorities []string `xml:"re_pri"`
}

type JmdictSense ¶

type JmdictSense struct {
	// These elements, if present, indicate that the sense is restricted
	// to the lexeme represented by the keb and/or reb.
	RestrictedKanji    []string `xml:"stagk"`
	RestrictedReadings []string `xml:"stagr"`

	// This element is used to indicate a cross-reference to another
	// entry with a similar or related meaning or sense. The content of
	// this element is typically a keb or reb element in another entry. In some
	// cases a keb will be followed by a reb and/or a sense number to provide
	// a precise target for the cross-reference. Where this happens, a JIS
	// "centre-dot" (0x2126) is placed between the components of the
	// cross-reference.
	References []string `xml:"xref"`

	// This element is used to indicate another entry which is an
	// antonym of the current entry/sense. The content of this element
	// must exactly match that of a keb or reb element in another entry.
	Antonyms []string `xml:"ant"`

	// Part-of-speech information about the entry/sense. Should use
	// appropriate entity codes. In general where there are multiple senses
	// in an entry, the part-of-speech of an earlier sense will apply to
	// later senses unless there is a new part-of-speech indicated.
	PartsOfSpeech []string `xml:"pos"`

	// Information about the field of application of the entry/sense.
	// When absent, general application is implied. Entity coding for
	// specific fields of application.
	Fields []string `xml:"field"`

	// This element is used for other relevant information about
	// the entry/sense. As with part-of-speech, information will usually
	// apply to several senses.
	Misc []string `xml:"misc"`

	// This element records the information about the source
	// language(s) of a loan-word/gairaigo. If the source language is other
	// than English, the language is indicated by the xml:lang attribute.
	// The element value (if any) is the source word or phrase.
	SourceLanguages []JmdictSource `xml:"lsource"`

	// For words specifically associated with regional dialects in
	// Japanese, the entity code for that dialect, e.g. ksb for Kansaiben.
	Dialects []string `xml:"dial"`

	// The sense-information elements provided for additional
	// information to be recorded about a sense. Typical usage would
	// be to indicate such things as level of currency of a sense, the
	// regional variations, etc.
	Information []string `xml:"s_inf"`

	// Within each sense will be one or more "glosses", i.e.
	// target-language words or phrases which are equivalents to the
	// Japanese word. This element would normally be present, however it
	// may be omitted in entries which are purely for a cross-reference.
	Glossary []JmdictGlossary `xml:"gloss"`

	// Some JMdict entries can contain 0 or more examples
	Examples []JmdictExample `xml:"example"`
}

type JmdictSource ¶

type JmdictSource struct {
	Content string `xml:",chardata"`

	// The xml:lang attribute defines the language(s) from which
	// a loanword is drawn.  It will be coded using the three-letter language
	// code from the ISO 639-2 standard. When absent, the value "eng" (i.e.
	// English) is the default value. The bibliographic (B) codes are used.
	Language *string `xml:"lang,attr"`

	// The ls_type attribute indicates whether the lsource element
	// fully or partially describes the source word or phrase of the
	// loanword. If absent, it will have the implied value of "full".
	// Otherwise it will contain "part".
	Type *string `xml:"ls_type,attr"`

	// The ls_wasei attribute indicates that the Japanese word
	// has been constructed from words in the source language, and
	// not from an actual phrase in that language. Most commonly used to
	// indicate "waseieigo".
	Wasei string `xml:"ls_wasei,attr"`
}

type Jmnedict ¶

type Jmnedict struct {
	// Entries consist of kanji elements, reading elements
	// name translation elements. Each entry must have at
	// least one reading element and one sense element. Others are optional.
	Entries []JmnedictEntry `xml:"entry"`
}

func LoadJmnedict ¶

func LoadJmnedict(reader io.Reader) (Jmnedict, map[string]string, error)

func LoadJmnedictNoTransform ¶

func LoadJmnedictNoTransform(reader io.Reader) (Jmnedict, map[string]string, error)

type JmnedictEntry ¶

type JmnedictEntry struct {
	// A unique numeric sequence number for each entry
	Sequence int `xml:"ent_seq"`

	// The kanji element, or in its absence, the reading element, is
	// the defining component of each entry.
	// The overwhelming majority of entries will have a single kanji
	// element associated with an entity name in Japanese. Where there are
	// multiple kanji elements within an entry, they will be orthographical
	// variants of the same word, either using variations in okurigana, or
	// alternative and equivalent kanji. Common "mis-spellings" may be
	// included, provided they are associated with appropriate information
	// fields. Synonyms are not included; they may be indicated in the
	// cross-reference field associated with the sense element.
	Kanji []JmnedictKanji `xml:"k_ele"`

	// The reading element typically contains the valid readings
	// of the word(s) in the kanji element using modern kanadzukai.
	// Where there are multiple reading elements, they will typically be
	// alternative readings of the kanji element. In the absence of a
	// kanji element, i.e. in the case of a word or phrase written
	// entirely in kana, these elements will define the entry.
	Readings []JmnedictReading `xml:"r_ele"`

	// The trans element will record the translational equivalent
	// of the Japanese name, plus other related information.
	Translations []JmnedictTranslation `xml:"trans"`
}

type JmnedictKanji ¶

type JmnedictKanji struct {
	// This element will contain an entity name in Japanese
	// which is written using at least one non-kana character (usually
	// kanji, but can be other characters). The valid
	// characters are kanji, kana, related characters such as chouon and
	// kurikaeshi, and in exceptional cases, letters from other alphabets.
	Expression string `xml:"keb"`

	// This is a coded information field related specifically to the
	// orthography of the keb, and will typically indicate some unusual
	// aspect, such as okurigana irregularity.
	Information []string `xml:"ke_inf"`

	// This and the equivalent re_pri field are provided to record
	// information about the relative priority of the entry, and are for
	// use either by applications which want to concentrate on entries of
	// a particular priority, or to generate subset files. The reason
	// both the kanji and reading elements are tagged is because on
	// occasions a priority is only associated with a particular
	// kanji/reading pair.
	Priorities []string `xml:"ke_pri"`
}

type JmnedictReading ¶

type JmnedictReading struct {
	// This element content is restricted to kana and related
	// characters such as chouon and kurikaeshi. Kana usage will be
	// consistent between the keb and reb elements; e.g. if the keb
	// contains katakana, so too will the reb.
	Reading string `xml:"reb"`

	// This element is used to indicate when the reading only applies
	// to a subset of the keb elements in the entry. In its absence, all
	// readings apply to all kanji elements. The contents of this element
	// must exactly match those of one of the keb elements.
	Restrictions []string `xml:"re_restr"`

	// General coded information pertaining to the specific reading.
	// Typically it will be used to indicate some unusual aspect of
	// the reading.
	Information []string `xml:"re_inf"`

	// See the comment on ke_pri above.
	Priorities []string `xml:"re_pri"`
}

type JmnedictTranslation ¶

type JmnedictTranslation struct {
	// The type of name, recorded in the appropriate entity codes.
	NameTypes []string `xml:"name_type"`

	// This element is used to indicate a cross-reference to another
	// entry with a similar or related meaning or sense. The content of
	// this element is typically a keb or reb element in another entry. In some
	// cases a keb will be followed by a reb and/or a sense number to provide
	// a precise target for the cross-reference. Where this happens, a JIS
	// "centre-dot" (0x2126) is placed between the components of the
	// cross-reference.
	References []string `xml:"xref"`

	// The actual translations of the name, usually as a transcription
	// into the target language.
	Translations []string `xml:"trans_det"`

	// The xml:lang attribute defines the target language of the
	// translated name. It will be coded using the three-letter language
	// code from the ISO 639-2 standard. When absent, the value "eng"
	// (i.e. English) is the default value. The bibliographic (B) codes
	// are used.
	Language *string `xml:"lang,attr"`
}

type Kanjidic ¶

type Kanjidic struct {
	// The single header element will contain identification information
	// about the version of the file
	Header KanjidicHeader `xml:"header"`

	Characters []KanjidicCharacter `xml:"character"`
}

func LoadKanjidic ¶

func LoadKanjidic(reader io.Reader) (Kanjidic, error)

type KanjidicCharacter ¶

type KanjidicCharacter struct {
	// The character itself in UTF8 coding.
	Literal string `xml:"literal"`

	// The codepoint element states the code of the character in the various
	// character set standards.
	Codepoint []KanjidicCodepoint `xml:"codepoint>cp_value"`

	// The radical number, in the range 1 to 214. The particular
	// classification type is stated in the rad_type attribute.
	Radical []KanjidicRadical `xml:"radical>rad_value"`

	Misc KanjidicMisc `xml:"misc"`

	// This element contains the index numbers and similar unstructured
	// information such as page numbers in a number of published dictionaries,
	// and instructional books on kanji.
	DictionaryNumbers []KanjidicDicNumber `xml:"dic_number>dic_ref"`

	// These codes contain information relating to the glyph, and can be used
	// for finding a required kanji. The type of code is defined by the
	// qc_type attribute.
	QueryCode []KanjidicQueryCode `xml:"query_code>q_code"`

	// The readings for the kanji in several languages, and the meanings, also
	// in several languages. The readings and meanings are grouped to enable
	// the handling of the situation where the meaning is differentiated by
	// reading. [T1]
	ReadingMeaning *KanjidicReadingMeaning `xml:"reading_meaning"`
}

type KanjidicCodepoint ¶

type KanjidicCodepoint struct {
	// The cp_value contains the codepoint of the character in a particular
	// standard. The standard will be identified in the cp_type attribute.
	Value string `xml:",chardata"`

	// The cp_type attribute states the coding standard applying to the
	// element. The values assigned so far are:
	// 	jis208 - JIS X 0208-1997 - kuten coding (nn-nn)
	// 	jis212 - JIS X 0212-1990 - kuten coding (nn-nn)
	// 	jis213 - JIS X 0213-2000 - kuten coding (p-nn-nn)
	// 	ucs - Unicode 4.0 - hex coding (4 or 5 hexadecimal digits)
	Type string `xml:"cp_type,attr"`
}

type KanjidicDicNumber ¶

type KanjidicDicNumber struct {
	Value string `xml:",chardata"`

	// The dr_type defines the dictionary or reference book, etc. to which
	// dic_ref element applies. The initial allocation is:
	//   nelson_c - "Modern Reader's Japanese-English Character Dictionary",
	//   	edited by Andrew Nelson (now published as the "Classic"
	//   	Nelson).
	//   nelson_n - "The New Nelson Japanese-English Character Dictionary",
	//   	edited by John Haig.
	//   halpern_njecd - "New Japanese-English Character Dictionary",
	//   	edited by Jack Halpern.
	//   halpern_kkd - "Kodansha Kanji Dictionary", (2nd Ed. of the NJECD)
	//   	edited by Jack Halpern.
	//   halpern_kkld - "Kanji Learners Dictionary" (Kodansha) edited by
	//   	Jack Halpern.
	//   halpern_kkld_2ed - "Kanji Learners Dictionary" (Kodansha), 2nd edition
	//     (2013) edited by Jack Halpern.
	//   heisig - "Remembering The  Kanji"  by  James Heisig.
	//   heisig6 - "Remembering The  Kanji, Sixth Ed."  by  James Heisig.
	//   gakken - "A  New Dictionary of Kanji Usage" (Gakken)
	//   oneill_names - "Japanese Names", by P.G. O'Neill.
	//   oneill_kk - "Essential Kanji" by P.G. O'Neill.
	//   moro - "Daikanwajiten" compiled by Morohashi. For some kanji two
	//   	additional attributes are used: m_vol:  the volume of the
	//   	dictionary in which the kanji is found, and m_page: the page
	//   	number in the volume.
	//   henshall - "A Guide To Remembering Japanese Characters" by
	//   	Kenneth G.  Henshall.
	//   sh_kk - "Kanji and Kana" by Spahn and Hadamitzky.
	//   sh_kk2 - "Kanji and Kana" by Spahn and Hadamitzky (2011 edition).
	//   sakade - "A Guide To Reading and Writing Japanese" edited by
	//   	Florence Sakade.
	//   jf_cards - Japanese Kanji Flashcards, by Max Hodges and
	// 	Tomoko Okazaki. (Series 1)
	//   henshall3 - "A Guide To Reading and Writing Japanese" 3rd
	// 	edition, edited by Henshall, Seeley and De Groot.
	//   tutt_cards - Tuttle Kanji Cards, compiled by Alexander Kask.
	//   crowley - "The Kanji Way to Japanese Language Power" by
	//   	Dale Crowley.
	//   kanji_in_context - "Kanji in Context" by Nishiguchi and Kono.
	//   busy_people - "Japanese For Busy People" vols I-III, published
	// 	by the AJLT. The codes are the volume.chapter.
	//   kodansha_compact - the "Kodansha Compact Kanji Guide".
	//   maniette - codes from Yves Maniette's "Les Kanjis dans la tete" French adaptation of Heisig.
	Type string `xml:"dr_type,attr"`

	// See above under "moro".
	Volume string `xml:"m_vol,attr"`

	// See above under "moro".
	Page string `xml:"m_page,attr"`
}

type KanjidicHeader ¶

type KanjidicHeader struct {
	// This field denotes the version of kanjidic2 structure, as more
	// than one version may exist.
	FileVersion string `xml:"file_version"`

	// The version of the file, in the format YYYY-NN, where NN will be
	// a number starting with 01 for the first version released in a
	// calendar year, then increasing for each version in that year.
	DatabaseVersion string `xml:"database_version"`

	// The date the file was created in international format (YYYY-MM-DD).
	DateOfCreation string `xml:"date_of_creation"`
}

type KanjidicMeaning ¶

type KanjidicMeaning struct {
	// The meaning associated with the kanji.
	Meaning string `xml:",chardata"`

	// The m_lang attribute defines the target language of the meaning. It
	// will be coded using the two-letter language code from the ISO 639-1
	// standard. When absent, the value "en" (i.e. English) is implied. [{}]
	Language *string `xml:"m_lang,attr"`
}

type KanjidicMisc ¶

type KanjidicMisc struct {
	// The kanji grade level. 1 through 6 indicates a Kyouiku kanji
	// and the grade in which the kanji is taught in Japanese schools.
	// 8 indicates it is one of the remaining Jouyou Kanji to be learned
	// in junior high school, and 9 or 10 indicates it is a Jinmeiyou (for use
	// in names) kanji. [G]
	Grade *string `xml:"grade"`

	// The stroke count of the kanji, including the radical. If more than
	// one, the first is considered the accepted count, while subsequent ones
	// are common miscounts. (See Appendix E. of the KANJIDIC documentation
	// for some of the rules applied when counting strokes in some of the
	// radicals.) [S]
	StrokeCounts []string `xml:"stroke_count"`

	// Either a cross-reference code to another kanji, usually regarded as a
	// variant, or an alternative indexing code for the current kanji.
	// The type of variant is given in the var_type attribute.
	Variants []KanjidicVariant `xml:"variant"`

	// A frequency-of-use ranking. The 2,500 most-used characters have a
	// ranking; those characters that lack this field are not ranked. The
	// frequency is a number from 1 to 2,500 that expresses the relative
	// frequency of occurrence of a character in modern Japanese. This is
	// based on a survey in newspapers, so it is biassed towards kanji
	// used in newspaper articles. The discrimination between the less
	// frequently used kanji is not strong. (Actually there are 2,501
	// kanji ranked as there was a tie.)
	Frequency *string `xml:"freq"`

	// When the kanji is itself a radical and has a name, this element
	// contains the name (in hiragana.) [T2]
	RadicalName []string `xml:"rad_name"`

	// The (former) Japanese Language Proficiency test level for this kanji.
	// Values range from 1 (most advanced) to 4 (most elementary). This field
	// does not appear for kanji that were not required for any JLPT level.
	// Note that the JLPT test levels changed in 2010, with a new 5-level
	// system (N1 to N5) being introduced. No official kanji lists are
	// available for the new levels. The new levels are regarded as
	// being similar to the old levels except that the old level 2 is
	// now divided between N2 and N3.
	JlptLevel *string `xml:"jlpt"`
}

type KanjidicQueryCode ¶

type KanjidicQueryCode struct {
	Value string `xml:",chardata"`

	//   deroo - the codes developed by the late Father Joseph De Roo, and
	//   	published in  his book "2001 Kanji" (Bonjinsha). Fr De Roo
	//   	gave his permission for these codes to be included. [DR]
	//   misclass - a possible misclassification of the kanji according
	// 	to one of the code types. (See the "Z" codes in the KANJIDIC
	// 	documentation for more details.)
	Type string `xml:"qc_type,attr"`

	// The values of this attribute indicate the type if
	// misclassification:
	// - posn - a mistake in the division of the kanji
	// - stroke_count - a mistake in the number of strokes
	// - stroke_and_posn - mistakes in both division and strokes
	// - stroke_diff - ambiguous stroke counts depending on glyph
	Misclassification string `xml:"skip_misclass,attr"`
}

type KanjidicRadical ¶

type KanjidicRadical struct {
	Value string `xml:",chardata"`

	// The rad_type attribute states the type of radical classification.
	// classical - as recorded in the KangXi Zidian.
	// nelson_c - as used in the Nelson "Modern Japanese-English
	// Character Dictionary" (i.e. the Classic, not the New Nelson).
	// This will only be used where Nelson reclassified the kanji.
	Type string `xml:"rad_type,attr"`
}

type KanjidicReading ¶

type KanjidicReading struct {
	Value string `xml:",chardata"`

	// The r_type attribute defines the type of reading in the reading
	// element. The current values are:
	//   pinyin - the modern PinYin romanization of the Chinese reading
	//   	of the kanji. The tones are represented by a concluding
	//   	digit. [Y]
	//   korean_r - the romanized form of the Korean reading(s) of the
	//   	kanji.  The readings are in the (Republic of Korea) Ministry
	//   	of Education style of romanization. [W]
	//   korean_h - the Korean reading(s) of the kanji in hangul.
	//   ja_on - the "on" Japanese reading of the kanji, in katakana.
	//   	Another attribute r_status, if present, will indicate with
	//   	a value of "jy" whether the reading is approved for a
	//   	"Jouyou kanji".
	// 	A further attribute on_type, if present,  will indicate with
	// 	a value of kan, go, tou or kan'you the type of on-reading.
	//   ja_kun - the "kun" Japanese reading of the kanji, usually in
	// 	hiragana.
	//   	Where relevant the okurigana is also included separated by a
	//   	".". Readings associated with prefixes and suffixes are
	//   	marked with a "-". A second attribute r_status, if present,
	//   	will indicate with a value of "jy" whether the reading is
	//   	approved for a "Jouyou kanji".
	Type string `xml:"r_type,attr"`

	// See under ja_on above.
	OnType *string `xml:"on_type"`

	// See under ja_on and ja_kun above.
	JouyouStatus *string `xml:"r_status"`
}

type KanjidicReadingMeaning ¶

type KanjidicReadingMeaning struct {
	// The reading element contains the reading or pronunciation
	// of the kanji.
	Readings []KanjidicReading `xml:"rmgroup>reading"`

	// The meaning associated with the kanji.
	Meanings []KanjidicMeaning `xml:"rmgroup>meaning"`

	// Japanese readings that are now only associated with names.
	Nanori []string `xml:"nanori"`
}

type KanjidicVariant ¶

type KanjidicVariant struct {
	Value string `xml:",chardata"`

	// The var_type attribute indicates the type of variant code. The current
	// values are:
	// 	jis208 - in JIS X 0208 - kuten coding
	// 	jis212 - in JIS X 0212 - kuten coding
	// 	jis213 - in JIS X 0213 - kuten coding
	// 	  (most of the above relate to "shinjitai/kyuujitai"
	// 	  alternative character glyphs)
	// 	deroo - De Roo number - numeric
	// 	njecd - Halpern NJECD index number - numeric
	// 	s_h - The Kanji Dictionary (Spahn & Hadamitzky) - descriptor
	// 	nelson_c - "Classic" Nelson - numeric
	// 	oneill - Japanese Names (O'Neill) - numeric
	// 	ucs - Unicode codepoint- hex
	Type string `xml:"var_type"`
}

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL