verifier

package
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 24, 2021 License: MIT Imports: 2 Imported by: 27

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CurationLevel

type CurationLevel int

CurationLevel tells if matched result was returned by at least one DataSource in the following categories.

const (
	// NotCurated means that all DataSources where the name-string was matched
	// are not curated sufficiently.
	NotCurated CurationLevel = iota

	// AutoCurated means that at least one of the returned DataSources invested
	// significantly in curating their data by scripts.
	AutoCurated

	// Curated means that at least one DataSource is marked as sufficiently
	// curated. It does not mean that the particular match was manually checked
	// though.
	Curated
)

func (CurationLevel) MarshalJSON

func (cl CurationLevel) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaller interface and converts MatchType into a string.

func (CurationLevel) String

func (cl CurationLevel) String() string

func (*CurationLevel) UnmarshalJSON

func (cl *CurationLevel) UnmarshalJSON(bs []byte) error

UnmarshalJSON implements json.Unmarshaller interface and converts a string into MatchType.

type DataSource

type DataSource struct {
	// ID is a DataSource Id.
	ID int `json:"id"`

	// UUID generated by GlobalNames and associated with the DataSource
	UUID string `json:"uuid,omitempty"`

	// Title is a full title of a DataSource
	Title string `json:"title"`

	// TitleShort is a shortened/abbreviated title of a DataSource.
	TitleShort string `json:"titleShort"`

	// Version of the data-set for a DataSource.
	Version string `json:"version,omitempty"`

	// RevisionDate of a data-set from a data-provider.
	// It follows format of 'YYYY-MM-DD' || 'YYYY-MM' || 'YYYY'
	// This data comes from the information given by the data-provider,
	// while UpdatedAt field is the date of harvesting of the
	// resource.
	RevisionDate string `json:"releaseDate,omitempty"`

	// DOI of a DataSource;
	DOI string `json:"doi,omitempty"`

	// Citation representing a DataSource
	Citation string `json:"citation,omitempty"`

	// Authors associated with the DataSource
	Authors string `json:"authors,omitempty"`

	// Description of the DataSource.
	Description string `json:"description,omitempty"`

	// WebsiteURL is a hompage of a DataSource
	WebsiteURL string `json:"homeURL,omitempty"`

	// OutlinkURL is a template for generating outlink URLs. Verification
	// output will substitute '{}' with an OutlinkID
	OutlinkURL string `json:"-"`

	// IsOutlinkReady is true for data-sources that have enough data and
	// metadata to be recommended for outlinking by third-party applications
	// (be included into preferred data-sources). When false, it does not
	// mean that the original resource is not valuable, it means that
	// its representation at gnames is not complete/resent enough.
	IsOutlinkReady bool `json:"isOutlinkReady,omitempty"`

	// Curation determines how much of manual or programmatic work is put
	// into assuring the quality of the data.
	Curation CurationLevel `json:"curation"`

	// RecordCount tells how many entries are in a DataSource.
	RecordCount int `json:"recordCount"`

	// UpdatedAt is the last import date (YYYY-MM-DD). In contrast,
	// RevisionDate field indicates when the resource was
	// updated according to its data-provider.
	UpdatedAt string `json:"updatedAt"`
}

DataSource provides metadata for an externally collected data-set.

type MatchTypeValue

type MatchTypeValue int

MatchTypeValue describes how a name-string matched a name in gnames database.

const (
	// NoMatch means that verification failed.
	NoMatch MatchTypeValue = iota

	// Exact means either canonical form, or the whole name-string matched
	// perfectlly.
	Exact

	// Fuzzy means that matches were not exact due to similarity of name-strings,
	// OCR or typing errors. Take these results with more suspition than
	// Exact matches. Fuzzy match is never done on uninomials due to the
	// high rate of false positives.
	Fuzzy

	// PartialExact used if GNames failed to match full name string. Now the match
	// happened by removing either middle species epithets, or by choppping the
	// 'tail' words of the input name-string canonical form.
	PartialExact

	// PartialFuzzy is the same as PartialExact, but also the match was not
	// exact. We never do fuzzy matches for uninomials, due to high rate of false
	// positives.
	PartialFuzzy
)

func NewMatchType

func NewMatchType(t string) MatchTypeValue

NewMatchType takes a string and converts it into a MatchType. If the string is unkown, it returns NoMatch type.

func (MatchTypeValue) MarshalJSON

func (mt MatchTypeValue) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaller interface and converts MatchType into a string.

func (MatchTypeValue) String

func (mt MatchTypeValue) String() string

String implements fmt.String interface and returns a string representation of a MatchType. The returned string can be converted back to MatchType via NewMatchType function.

func (*MatchTypeValue) UnmarshalJSON

func (mt *MatchTypeValue) UnmarshalJSON(bs []byte) error

UnmarshalJSON implements json.Unmarshaller interface and converts a string into MatchType.

type ResultData

type ResultData struct {
	// DataSourceID is the ID of a matched DataSource.
	DataSourceID int `json:"dataSourceId"`

	// Shortened/abbreviated title of the data source.
	DataSourceTitleShort string `json:"dataSourceTitleShort"`

	// Curation of the data source.
	Curation CurationLevel `json:"curation"`

	// RecordID from a data source. We try our best to return ID that corresponds to
	// dwc:taxonID of a DataSource. If such ID is not provided, this ID will be
	// auto-generated.  Auto-generated IDs will have 'gn_' prefix.
	RecordID string `json:"recordId"`

	// GlobalID that is exposed globally by a DataSource. Such IDs are usually
	// self-resolved, like for example LSID, pURL, DOI etc.
	GlobalID string `json:"globalId,omitempty"`

	// LocalID used by a DataSource internally. If an OutLink field is provided,
	// LocalID serves as a 'dynamic' component of the URL.
	LocalID string `json:"localId,omitempty"`

	// Outlink to the record in the DataSource. It consists of a 'stable'
	// URL and an appended 'dynamic' LocalID
	Outlink string `json:"outlink,omitempty"`

	// EntryDate is a timestamp created on entry of the data.
	EntryDate string `json:"entryDate"`

	// Score determines how well the match did work. It is used to determine
	// best match overall, and best match for every data-source.
	Score uint32 `json:"-"`

	// ParsingQuality determines how well gnparser was able to break the
	// name-string to its components. 0 - no parse, 1 - clean parse,
	// 2 - some problems, 3 - significant problems.
	ParsingQuality int `json:"-"`

	// MatchedName is a name-string from the DataSource that was matched
	// by GNames algorithm.
	MatchedName string `json:"matchedName"`

	// MatchCardinality is the cardinality of returned name:
	// 0 - No match, virus or hybrid formula,
	// 1 - Uninomial, 2 - Binomial, 3 - trinomial etc.
	MatchedCardinality int `json:"matchedCardinality"`

	// MatchedCanonicalSimple is a simplified canonicl form without ranks for
	// names lower than species, and with ommited hybrid signs for named hybrids.
	// Quite often simple canonical is the same as full canonical. Hybrid signs
	// are preserved for hybrid formulas.
	MatchedCanonicalSimple string `json:"matchedCanonicalSimple,omitempty"`

	// MatchedCanonicalFull is a canonical form that preserves hybrid signs
	// and infraspecific ranks.
	MatchedCanonicalFull string `json:"matchedCanonicalFull,omitempty"`

	// MatchedAuthors is a list of authors mentioned in the name.
	MatchedAuthors []string `json:"-"`

	// MatchedYear is a year mentioned in the name. Multiple years or
	// approximate years are ignored.
	MatchedYear int `json:"-"`

	// CurrentRecordID is the id of currently accepted name given by
	// the data-source.
	CurrentRecordID string `json:"currentRecordId"`

	// CurrentName is a currently accepted name (it is only provided by
	// DataSources with taxonomic data).
	CurrentName string `json:"currentName"`

	// CurrentCardinality is a cardinality of the accepted name.
	// It might differ from the matched name cardinality.
	CurrentCardinality int `json:"currentCardinality"`

	// CurrentCanonicalSimple is a canonical form for the currently accepted name.
	CurrentCanonicalSimple string `json:"currentCanonicalSimple"`

	// CurrentCanonicalFull is a full version of canonicall form for the
	// currently accepted name.
	CurrentCanonicalFull string `json:"currentCanonicalFull"`

	// IsSynonym is true if there is an indication in the DataSource that the
	// name is not a currently accepted name for one or another reason.
	IsSynonym bool `json:"isSynonym"`

	// ClassificationPath to the name (if provided by the DataSource).
	// Classification path consists of a hierarchy of name-strings.
	ClassificationPath string `json:"classificationPath,omitempty"`

	// ClassificationRanks of the classification path. They follow the
	// same order as the classification path.
	ClassificationRanks string `json:"classificationRanks,omitempty"`

	// ClassificationIDs of the names-strings. They always correspond to
	// the "id" field.
	ClassificationIDs string `json:"classificationIds,omitempty"`

	// EditDistance is a Levenshtein edit distance between canonical form of the
	// input name-string and the matched canonical form. If match type is
	// "EXACT", edit-distance will be 0.
	EditDistance int `json:"editDistance"`

	// StemEditDistance is a Levenshtein edit distance after removing suffixes
	// from specific epithets from canonical forms.
	StemEditDistance int `json:"stemEditDistance"`

	//MatchType describes what kind of a match happened to a name-string.
	MatchType MatchTypeValue `json:"matchType"`

	// Vernacular names that correspond to the matched name. (Will be implemented
	// later)
	Vernaculars []Vernacular `json:"vernaculars,omitempty"`
}

ResultData are returned data of the "best" or "preferred" result of name verification.

type Verification

type Verification struct {
	// InputID is a UUIDv5 generated out of the Input string.
	InputID string `json:"inputId"`

	// Input is a verified name-string
	Input string `json:"input"`

	// InputCapitalized is true, if the was a request to capitalize input
	InputCapitalized bool `json:"inputCapitalized,omitempty"`

	// MatchType is best available match.
	MatchType MatchTypeValue `json:"matchType"`

	// BestResult is the best result according to GNames scoring.
	BestResult *ResultData `json:"bestResult,omitempty"`

	// PreferredResults contain all detected matches from preverred data sources
	// provided by user.
	PreferredResults []*ResultData `json:"preferredResults,omitempty"`

	// DataSourcesNum is a number of data sources that matched an
	// input name-string.
	DataSourcesNum int `json:"dataSourcesNum"`

	// Curation estimates reliability of matched data sources. If
	// matches are returned by at least one manually curated data source, or by
	// automatically curated data source, or only by sources that are not
	// significantly manually curated.
	Curation CurationLevel `json:"curation"`

	// Error provides an error message, if any. If error is not empty, the match
	// failed because of a bug in the service.
	Error string `json:"error,omitempty"`
}

Verification is a result returned by Verify method.

type VerifyParams

type VerifyParams struct {
	// NameStrings is a list of name-strings to verify.
	NameStrings []string `json:"nameStrings"`

	// PreferredSources contain DataSources IDs whos matches will be returned
	// becides the best result. See PreferredResults field in Verirication.
	PreferredSources []int `json:"preferredSources"`

	// WithAllSources indicates that matches return from all sources. If
	// WithAllSources is true, PreferredSources setting is ignored.
	WithAllSources bool `json:"withAllSources"`

	// WithAllMathces indicates that all matches per data-source are returned,
	// sorted by score (instead of the best match per source). If WithAllSources
	// is also given, then all results from all sources are returned.
	WithAllMatches bool `json:"withAllMatches"`

	// WithVernaculars indicates if corresponding vernacular results will be
	// returned as well.
	WithVernaculars bool `json:"withVernaculars"`

	// WithCapitalization flag; when true, the first rune of low-case
	// input name-strings will be capitalized if appropriate.
	WithCapitalization bool `json:"withCapitalization"`
}

VerifyParams are options/parameters for the Verify method.

type Vernacular

type Vernacular struct {
	Name string `json:"name"`

	// Language of the name, hopefully in ISO form.
	Language string `json:"language,omitempty"`

	// Locality is geographic places where the name is used.
	Locality string `json:"locality,omitempty"`
}

Vernacular name

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL