Documentation
¶
Overview ¶
Package config sets up defaults used by both the SF and roy tools Config options can be overridden with build flags e.g. for archivematica signatures.
Core siegfried defaults
Index ¶
- func ArcArcTypes() []string
- func ArcGzipTypes() []string
- func ArcTarTypes() []string
- func ArcWarcTypes() []string
- func ArcZipTypes() []string
- func ChangesURL() string
- func Checkpoint(i int64) bool
- func Choices() int
- func Clear() func() private
- func Conf() string
- func Container() string
- func ContainerBase() string
- func Cost() int
- func Debug() bool
- func Details(extra ...string) string
- func Distance() int
- func DoubleUp() bool
- func Droid() string
- func DroidBase() string
- func Exclude(ids []string) []string
- func ExcludeDoubles(puids, cont []string) []string
- func Extend() []string
- func ExtendC() []string
- func Fpr() string
- func GetWikidataNamespace() string
- func GetWikidataNoPRONOM() bool
- func GetWikidataRevisionHistoryLen() int
- func GetWikidataRevisionHistoryThreads() int
- func HarvestOptions() (string, time.Duration, time.Duration, *http.Transport)
- func HasExclude() bool
- func HasLimit() bool
- func Home() string
- func LOC() string
- func Limit(ids []string) []string
- func ListAllArcTypes() string
- func Local(base string) string
- func MIMEInfo() string
- func MIMEVersion() []string
- func Magic() []byte
- func MaxBOF() int
- func MaxEOF() int
- func Name() string
- func NoByte() bool
- func NoClass() bool
- func NoContainer() bool
- func NoEOF() bool
- func NoMIME() bool
- func NoName() bool
- func NoPRONOM() bool
- func NoPriority() bool
- func NoRIFF() bool
- func NoText() bool
- func NoXML() bool
- func Out() io.Writer
- func Range() int
- func Repetition() int
- func Reports() string
- func SetArchiveFilterPermissive(value string) []string
- func SetBOF(b int) func() private
- func SetChoices(i int) func() private
- func SetConf(s string)
- func SetContainer(c string) func() private
- func SetCost(i int) func() private
- func SetCustomWikibaseEndpoint(customEndpointURL string, customWikibaseURL string) error
- func SetCustomWikibaseQuery() error
- func SetDebug()
- func SetDetails(d string) func() private
- func SetDistance(i int) func() private
- func SetDoubleUp() func() private
- func SetDroid(d string) func() private
- func SetEOF(e int) func() private
- func SetExclude(l []string) func() private
- func SetExtend(l []string) func() private
- func SetExtendC(l []string) func() private
- func SetHarvestThrottle(d time.Duration)
- func SetHarvestTimeout(d time.Duration)
- func SetHarvestTransport(t *http.Transport)
- func SetHome(h string)
- func SetLOC(fdd string) func() private
- func SetLimit(l []string) func() private
- func SetMIMEInfo(mi string) func() private
- func SetMulti(m string) func() private
- func SetName(n string) func() private
- func SetNoByte() func() private
- func SetNoClass() func() private
- func SetNoContainer() func() private
- func SetNoEOF() func() private
- func SetNoMIME() func() private
- func SetNoName() func() private
- func SetNoPRONOM() func() private
- func SetNoRIFF() func() private
- func SetNoReports() func() private
- func SetNoText() func() private
- func SetNoXML() func() private
- func SetOut(o io.Writer)
- func SetProps(pronom string, bof string, eof string) error
- func SetRange(i int) func() private
- func SetRepetition(i int) func() private
- func SetSignature(s string)
- func SetSlow()
- func SetVerbose(v bool) func() private
- func SetWikibasePropsPath(propsPath string) func() private
- func SetWikibaseSparql(query string) func() private
- func SetWikibaseURL(baseURL string) (func() private, error)
- func SetWikidataDebug() func() private
- func SetWikidataDefinitions(definitions string)
- func SetWikidataEndpoint(endpoint string) (func() private, error)
- func SetWikidataLang(lang string)
- func SetWikidataNamespace() func() private
- func SetWikidataNoPRONOM() func() private
- func SetWikidataPRONOM() func() private
- func Signature() string
- func SignatureBase() string
- func Slow() bool
- func TextMIME() string
- func TextPuid() string
- func UpdateOptions() (string, time.Duration, *http.Transport)
- func UserAgent() string
- func Verbose() bool
- func Version() [3]int
- func WikibaseBOF() string
- func WikibaseEOF() string
- func WikibasePronom() string
- func WikibasePropsPath() string
- func WikibaseSparqlFile() string
- func WikidataDebug() bool
- func WikidataDefinitionsFile() string
- func WikidataDefinitionsPath() string
- func WikidataEndpoint() string
- func WikidataFileMode() os.FileMode
- func WikidataHome() string
- func WikidataLang() string
- func WikidataSPARQL() string
- func WikidataSPARQLRevisionParam() string
- func WikidataWikibaseURL() string
- func ZipLOC() string
- func ZipMIME() string
- func ZipPuid() string
- type Archive
- type Multi
- type Option
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ArcArcTypes ¶ added in v1.9.0
func ArcArcTypes() []string
ArcArcTypes returns a string array with all Arc identifiers Siegfried can match and decompress.
func ArcGzipTypes ¶ added in v1.9.0
func ArcGzipTypes() []string
ArcGzipTypes returns a string array with all Gzip identifiers Siegfried can match and decompress.
func ArcTarTypes ¶ added in v1.9.0
func ArcTarTypes() []string
ArcTarTypes returns a string array with all Tar identifiers Siegfried can match and decompress.
func ArcWarcTypes ¶ added in v1.9.0
func ArcWarcTypes() []string
ArcWarcTypes returns a string array with all Arc identifiers Siegfried can match and decompress.
func ArcZipTypes ¶ added in v1.9.0
func ArcZipTypes() []string
ArcZipTypes returns a string array with all Zip identifiers Siegfried can match and decompress.
func Checkpoint ¶
Checkpoint reports the offset at which slow logging should trigger.
func Choices ¶
func Choices() int
Choices is a bytematcher setting. It controls the number of tolerable strings produced by processing signature segments. E.g. signature has two adjoining frames ("PDF") and ("1.1" OR "1.2") it can be processed into two search strings: "PDF1.1" and "PDF1.2". A low number of choices means a smaller Aho Corasick search tree and more patterns to follow-up. A large of choices means a larger Aho Corasick search tree and more signatures immediately satisfied without follow-up pattern matching.
func Clear ¶ added in v1.7.6
func Clear() func() private
Clear clears loc and mimeinfo details to avoid pollution when creating multiple identifiers in same session
func Conf ¶ added in v1.7.9
func Conf() string
Conf returns the path to the siegfried configuration file.
func Container ¶
func Container() string
Container returns the location of the DROID container signature file. If not set, infers the latest file.
func ContainerBase ¶
func ContainerBase() string
ContainerBase returns the base filename of the DROID container signature file. If not set, infers the latest file.
func Cost ¶ added in v1.8.0
func Cost() int
Choices is a bytematcher setting. It controls the number of tolerable matches in a worst case scenario for a signature segement. If this cost is exceeded, then segmentation won't happen and the choices/range/distance preferences will be ignored.
func Details ¶
Details returns a description of the identifier. This is auto-populated if not set directly. Extra information from signatures such as date last modified can be given to this function.
func Distance ¶
func Distance() int
Distance is a bytematcher setting. It controls the absolute widths at which segments in signatures are split. E.g. if segments are separated by a minimum of 50 and maximum of 100 bytes, the distance is 100. A short distance means a smaller Aho Corasick search tree and more patterns to follow-up. A long distance means a larger Aho Corasick search tree and more signatures immediately satisfied without follow-up pattern matching.
func DoubleUp ¶
func DoubleUp() bool
DoubleUp reports whether the doubleup flag has been set. This will cause byte signatures to be built for formats where container signatures are also provided.
func Droid ¶
func Droid() string
Droid returns the location of the DROID signature file. If not set, infers the latest file.
func DroidBase ¶
func DroidBase() string
DroidBase returns the base filename of the DROID signature file. If not set, infers the latest file.
func Exclude ¶
Exclude takes a slice of puids and omits those that are also in the identifier.exclude slice.
func ExcludeDoubles ¶
ExcludeDoubles takes a slice of puids and a slice of container puids and excludes those that are in the container slice, if nodoubles is set.
func Extend ¶
func Extend() []string
Extend reports whether a set of signature extensions has been provided.
func ExtendC ¶
func ExtendC() []string
ExtendC reports whether a set of container signature extensions has been provided.
func Fpr ¶
func Fpr() string
Fpr reports whether sf is being run in -fpr (Archivematica format policy registry) mode.
func GetWikidataNamespace ¶ added in v1.9.0
func GetWikidataNamespace() string
GetWikidataNamespace will return the Wikidata namespace field to the caller.
func GetWikidataNoPRONOM ¶ added in v1.9.0
func GetWikidataNoPRONOM() bool
GetWikidataNoPRONOM will tell the caller whether or not to use native PRONOM patterns inside the identifier.
func GetWikidataRevisionHistoryLen ¶ added in v1.9.2
func GetWikidataRevisionHistoryLen() int
GetWikidataRevisionHistoryLen will return the length of the Wikibase history to retrieve to the caller.
func GetWikidataRevisionHistoryThreads ¶ added in v1.9.2
func GetWikidataRevisionHistoryThreads() int
GetWikidataRevisionHistoryThreads will return the number of threads to use to retrieve Wikibase history to the caller.
func HarvestOptions ¶
HarvestOptions reports the PRONOM url, timeout and transport.
func HasExclude ¶
func HasExclude() bool
HasExclude reports whether an exlusion set of signatures has been provided.
func HasLimit ¶
func HasLimit() bool
HasLimit reports whether a limited set of signatures has been selected.
func Limit ¶
Limit takes a slice of puids and returns a new slice containing only those puids in the limit set.
func ListAllArcTypes ¶ added in v1.9.0
func ListAllArcTypes() string
ListAllArcTypes returns a list of archive file-format extensions that can be used to filter the files Siegfried will decompress to identify the contents of.
func MIMEInfo ¶
func MIMEInfo() string
MIMEInfo returns the location of the MIMEInfo signature file.
func MIMEVersion ¶ added in v1.7.7
func MIMEVersion() []string
func Magic ¶
func Magic() []byte
Magic returns the magic string encoded at the start of a siegfried signature file.
func NoByte ¶ added in v1.7.6
func NoByte() bool
NoByte reports whether byte signatures should be omitted.
func NoClass ¶ added in v1.10.0
func NoClass() bool
NoClass reports whether the noclass flag has been set. This will cause class to be omitted from format infos
func NoContainer ¶
func NoContainer() bool
NoContainer reports whether container signatures should be omitted.
func NoEOF ¶
func NoEOF() bool
NoEOF reports whether end of file segments of signatures should be trimmed.
func NoPriority ¶
func NoPriority() bool
NoPriority reports whether priorities between signatures should be omitted.
func Range ¶
func Range() int
Range is a bytematcher setting. It controls the relative widths at which segments in signatures are split. E.g. if segments are separated by a minimum of 50 and maximum of 100 bytes, the range is 50. A small range means a smaller Aho Corasick search tree and more patterns to follow-up. A large range means a larger Aho Corasick search tree and more signatures immediately satisfied without follow-up pattern matching.
func Repetition ¶ added in v1.8.0
func Repetition() int
Repetitition is a bytematcher setting. It is used in combination with Cost to determine segmentation.
func SetArchiveFilterPermissive ¶ added in v1.9.0
SetArchiveFilterPermissive will take our comma separated list of archives we want to extract from the Siegfried command-line and use the values to construct a permissive filter. Anything not in the slice returned at the end of this function will not be extracted when -z flag is used.
func SetBOF ¶
func SetBOF(b int) func() private
SetBOF limits the number of bytes to scan from the beginning of file.
func SetChoices ¶
func SetChoices(i int) func() private
SetChoices sets the choices variable for the bytematcher.
func SetConf ¶ added in v1.7.9
func SetConf(s string)
SetConf sets the configuration filename or filepath.
func SetContainer ¶
func SetContainer(c string) func() private
SetContainer sets the name and/or location of the DROID container signature file. I.e. can provide a full path or a filename relative to the HOME directory.
func SetCost ¶ added in v1.8.0
func SetCost(i int) func() private
SetCost sets the cost variable for the bytematcher.
func SetCustomWikibaseEndpoint ¶ added in v1.9.2
SetCustomWikibaseEndpoint sets a custom Wikibase endpoint if provided by the caller.
func SetCustomWikibaseQuery ¶ added in v1.9.2
func SetCustomWikibaseQuery() error
SetCustomWikibaseQuery checks for a custom query file and then sets the configuration to point to that file if it finds one.
func SetDetails ¶
func SetDetails(d string) func() private
SetDetails sets the identifier's description. If not provided, this description is automatically generated based on options set.
func SetDistance ¶
func SetDistance(i int) func() private
SetDistance sets the distance variable for the bytematcher.
func SetDoubleUp ¶
func SetDoubleUp() func() private
SetDoubleUp causes byte signatures to be built for formats where container signatures are also provided.
func SetDroid ¶
func SetDroid(d string) func() private
SetDroid sets the name and/or location of the DROID signature file. I.e. can provide a full path or a filename relative to the HOME directory.
func SetEOF ¶
func SetEOF(e int) func() private
SetEOF limits the number of bytes to scan from the end of file.
func SetExclude ¶
func SetExclude(l []string) func() private
SetExclude excludes the provided signatures from those built.
func SetExtend ¶
func SetExtend(l []string) func() private
SetExtend adds extension signatures to the build.
func SetExtendC ¶
func SetExtendC(l []string) func() private
SetExtendC adds container extension signatures to the build.
func SetHarvestThrottle ¶
SetHarvestThrottle sets a throttle value for downloading DROID reports.
func SetHarvestTimeout ¶
SetHarvestTimeout sets a time limit on PRONOM harvesting.
func SetHarvestTransport ¶
SetHarvestTransport sets the PRONOM harvesting transport.
func SetHome ¶
func SetHome(h string)
SetHome sets the siegfried HOME location (e.g. /usr/home/siegfried).
func SetLimit ¶
func SetLimit(l []string) func() private
SetLimit limits the set of signatures built to the list provide.
func SetMIMEInfo ¶
func SetMIMEInfo(mi string) func() private
func SetMulti ¶
func SetMulti(m string) func() private
SetMulti defines how identifiers report multiple results.
func SetNoByte ¶ added in v1.7.6
func SetNoByte() func() private
SetNoByte will cause byte signatures to be omitted.
func SetNoClass ¶ added in v1.10.0
func SetNoClass() func() private
SetNoClass causes class to be omitted from the format info
func SetNoContainer ¶
func SetNoContainer() func() private
SetNoContainer will cause container signatures to be omitted.
func SetNoEOF ¶
func SetNoEOF() func() private
SetNoEOF will cause end of file segments to be trimmed from signatures.
func SetNoMIME ¶
func SetNoMIME() func() private
SetNoMIME will cause MIME signatures to be omitted.
func SetNoName ¶
func SetNoName() func() private
SetNoName will cause extension signatures to be omitted.
func SetNoPRONOM ¶
func SetNoPRONOM() func() private
func SetNoRIFF ¶
func SetNoRIFF() func() private
SetNoRIFF will cause RIFF FOURCC signatures to be omitted.
func SetNoReports ¶
func SetNoReports() func() private
SetNoReports instructs roy to build from the DROID signature file alone (and not from the PRONOM reports).
func SetNoText ¶
func SetNoText() func() private
SetNoText will cause text signatures to be omitted.
func SetProps ¶ added in v1.9.2
SetProps will set the three minimum properties needed to run Roy/SF with a custom Wikibase instance.
func SetRange ¶
func SetRange(i int) func() private
SetRange sets the range variable for the bytematcher.
func SetRepetition ¶ added in v1.8.0
func SetRepetition(i int) func() private
SetRepetition sets the repetitition variable for the bytematcher.
func SetSignature ¶
func SetSignature(s string)
SetSignature sets the signature filename or filepath.
func SetVerbose ¶ added in v1.10.0
func SetVerbose(v bool) func() private
SetVerbose controls logging verbosity when building signatures
func SetWikibasePropsPath ¶ added in v1.9.2
func SetWikibasePropsPath(propsPath string) func() private
SetWikibasePropsPath allows the WikidataPropsPath to be overwritten, e.g. for testing, and if it becomes needed, in the primary Roy code base.
func SetWikibaseSparql ¶ added in v1.9.2
func SetWikibaseSparql(query string) func() private
SetWikibaseSparql sets the SPARQL placeholder for custom Wikibase queries.
func SetWikibaseURL ¶ added in v1.9.2
SetWikibaseURL lets the default value for the Wikibase URL to be overridden. The URL should be that which enables permalinks to be returned from Wikibase, e.g. for Wikidata this URL needs to be:
func SetWikidataDebug ¶ added in v1.9.0
func SetWikidataDebug() func() private
SetWikidataDebug turns linting messages on when compiling the identifier
func SetWikidataDefinitions ¶ added in v1.9.0
func SetWikidataDefinitions(definitions string)
SetWikidataDefinitions is a setter to enable us to elect to use a different signature file name, e.g. as a setter during testing.
func SetWikidataEndpoint ¶ added in v1.9.0
SetWikidataEndpoint enables the use of another Wikibase instance if one is available. If there is an error with the URL then summary information will be returned to the caller and the default endpoint will be used.
func SetWikidataLang ¶ added in v1.9.0
func SetWikidataLang(lang string)
SetWikidataLang sets the language that we want to return results in from Wikidata. The default is en.
func SetWikidataNamespace ¶ added in v1.9.0
func SetWikidataNamespace() func() private
SetWikidataNamespace will set the Wikidata namespace. One reason this isn't set already is that Roy's idiom is to use it as a signal to say this identifier is ON/OFF and should be used, i.e. when this function is called, we want to use a Wikidata identifier.
func SetWikidataNoPRONOM ¶ added in v1.9.0
func SetWikidataNoPRONOM() func() private
SetWikidataNoPRONOM will turn native PRONOM patterns off in the final identifier output.
func SetWikidataPRONOM ¶ added in v1.9.0
func SetWikidataPRONOM() func() private
SetWikidataPRONOM will turn native PRONOM patterns on in the final identifier output.
func Signature ¶
func Signature() string
Signature returns the path to the siegfried signature file.
func SignatureBase ¶
func SignatureBase() string
SignatureBase returns the filename of the siegfried signature file.
func UpdateOptions ¶
UpdateOptions returns the update URL, timeout and transport for the sf -update command.
func UserAgent ¶ added in v1.9.0
func UserAgent() string
UserAgent returns the siegbot User-Agent string for http requests.
func Verbose ¶ added in v1.10.0
func Verbose() bool
Verbose reports whether to build signatures with verbose logging output
func WikibaseBOF ¶ added in v1.9.2
func WikibaseBOF() string
WikibaseBOF will return the configured BOF property from the Wikibase configuration.
func WikibaseEOF ¶ added in v1.9.2
func WikibaseEOF() string
WikibaseEOF will return the configured EOF property from the Wikibase configuration.
func WikibasePronom ¶ added in v1.9.2
func WikibasePronom() string
WikibasePronom will return the configured PRONOM property from the Wikibase configuration.
func WikibasePropsPath ¶ added in v1.9.2
func WikibasePropsPath() string
WikibasePropsPath returns the file path expected for the configuration needed to tell roy how to interpret results from a custom Wikibase query.
Example:
{ "PronomProp": "http://wikibase.example.com/entity/Q2", "BofProp": "http://wikibase.example.com/entity/Q3", "EofProp": "http://wikibase.example.com/entity/Q4" }
func WikibaseSparqlFile ¶ added in v1.9.2
func WikibaseSparqlFile() string
WikibaseSparqlFile returns the file path expected for a custom Wikibase sparql query file. This file is needed to query a custom instance in the majority of cases. It is unlikely a host Wikibase will use the same configured properties and entities.
func WikidataDebug ¶ added in v1.9.0
func WikidataDebug() bool
WikidataDebug will return the status of the debug flag, i.e. true for debug linting messages, false for none.
func WikidataDefinitionsFile ¶ added in v1.9.0
func WikidataDefinitionsFile() string
WikidataDefinitionsFile returns the name of the file used to store the signature definitions.
func WikidataDefinitionsPath ¶ added in v1.9.0
func WikidataDefinitionsPath() string
WikidataDefinitionsPath is a helper for convenience from callers to point directly at the definitions path for reading/writing as required.
func WikidataEndpoint ¶ added in v1.9.0
func WikidataEndpoint() string
WikidataEndpoint returns the SPARQL endpoint to call when harvesting Wikidata definitions.
func WikidataFileMode ¶ added in v1.9.0
WikidataFileMode returns the file-mode required to save the definitions file.
func WikidataHome ¶ added in v1.9.0
func WikidataHome() string
WikidataHome describes where files needed by Siegfried and Roy for its Wikidata component resides.
func WikidataLang ¶ added in v1.9.0
func WikidataLang() string
WikidataLang returns the language we want to return results in from Wikidata.
func WikidataSPARQL ¶ added in v1.9.0
func WikidataSPARQL() string
WikidataSPARQL returns the SPARQL query required to harvest Wikidata definitions.
func WikidataSPARQLRevisionParam ¶ added in v1.9.2
func WikidataSPARQLRevisionParam() string
WikidataSPARQLRevisionParam returns the SPARQL parameter (?param) that returns the QID for the record that we want to return revision history and permalink for. E.g. ?uriLabl may return QID: Q12345. This will then be used to query Wikibase for its revision history.
func WikidataWikibaseURL ¶ added in v1.9.2
func WikidataWikibaseURL() string
WikidataWikibaseURL returns the SPARQL endpoint to call when harvesting Wikidata definitions.
Types ¶
type Archive ¶
type Archive int
Archive is a file format capable of decompression by sf.
const ( None Archive = iota // None means the format cannot be decompressed by sf. Zip // Zip describes a Zip type archive. Gzip // Gzip describes a Gzip type archive. . Tar // Tar describes a Tar type archive ARC // ARC describes an ARC web archive. WARC // WARC describes a WARC web archive. )
Archive type enum.
type Multi ¶
type Multi int
Multi defines how identifiers treat multiple results.
const ( Single Multi = iota // Return a single result. If there is more than one result with the highest score, return UNKNOWN and a warning Conclusive // Default. Return only the results with the highest score. Positive // Return any result with a strong score (or if only weak results, return all). This means a byte match, container match or XML match. Text/MIME/extension-only matches are considered weak. Comprehensive // Same as positive but also turn off the priority rules during byte matching. Exhaustive // Turn off priority rules during byte matching and return all weak as well as strong results. DROID // Turn off priority rules during byte matching but apply priorities to results with strong score after matching )