Documentation ¶
Overview ¶
Package zim implements reading support for the ZIM File Format.
Index ¶
- Constants
- type Cluster
- type DirectoryEntry
- func (e *DirectoryEntry) BlobNumber() uint32
- func (e *DirectoryEntry) ClusterNumber() uint32
- func (e *DirectoryEntry) IsArticle() bool
- func (e *DirectoryEntry) IsDeletedEntry() bool
- func (e *DirectoryEntry) IsLinkTarget() bool
- func (e *DirectoryEntry) IsRedirect() bool
- func (e *DirectoryEntry) Mimetype() Mimetype
- func (e *DirectoryEntry) Namespace() Namespace
- func (e *DirectoryEntry) RedirectIndex() uint32
- func (e *DirectoryEntry) Revision() uint32
- func (e *DirectoryEntry) String() string
- func (e *DirectoryEntry) Title() []byte
- func (e *DirectoryEntry) URL() []byte
- type File
- func (z *File) ArticleCount() uint32
- func (z *File) BlobReader(e *DirectoryEntry) (reader io.Reader, blobSize int64, err error)
- func (z *File) BlobReaderAt(clusterPosition, blobPosition uint32) (reader io.Reader, blobSize int64, err error)
- func (z *File) CalculateChecksum() ([md5.Size]byte, error)
- func (z *File) Close()
- func (z *File) ClusterAt(clusterPosition uint32) (Cluster, error)
- func (z *File) ClusterCount() uint32
- func (z *File) Counter() string
- func (z *File) Creator() string
- func (z *File) Date() string
- func (z *File) Description() string
- func (z *File) EntriesWithNamespace(namespace Namespace, limit int) []DirectoryEntry
- func (z *File) EntriesWithSimilarity(namespace Namespace, prefix []byte, limit int) []DirectoryEntry
- func (z *File) EntriesWithTitlePrefix(namespace Namespace, prefix []byte, limit int) []DirectoryEntry
- func (z *File) EntriesWithURLPrefix(namespace Namespace, prefix []byte, limit int) []DirectoryEntry
- func (z *File) EntryAtTitlePosition(position uint32) (DirectoryEntry, error)
- func (z *File) EntryAtURLPosition(position uint32) (DirectoryEntry, error)
- func (z *File) EntryWithNamespace(namespace Namespace) (entry DirectoryEntry, position uint32, found bool)
- func (z *File) EntryWithTitlePrefix(namespace Namespace, prefix []byte) (entry DirectoryEntry, position uint32, found bool)
- func (z *File) EntryWithURL(namespace Namespace, url []byte) (entry DirectoryEntry, urlPosition uint32, found bool)
- func (z *File) EntryWithURLPrefix(namespace Namespace, prefix []byte) (entry DirectoryEntry, position uint32, found bool)
- func (z *File) Favicon() (entry DirectoryEntry, err error)
- func (z *File) Filename() string
- func (z *File) Filesize() int
- func (z *File) FollowRedirect(redirectEntry *DirectoryEntry) (DirectoryEntry, error)
- func (z *File) InternalChecksum() ([md5.Size]byte, error)
- func (z *File) Language() string
- func (z *File) LayoutPage() (DirectoryEntry, error)
- func (z *File) License() string
- func (z *File) LongDescription() string
- func (z *File) MainPage() (DirectoryEntry, error)
- func (z *File) Metadata() map[string]string
- func (z *File) MetadataFor(key string) string
- func (z *File) MimetypeList() []string
- func (z *File) Name() string
- func (z *File) Publisher() string
- func (z *File) Relation() string
- func (z *File) Source() string
- func (z *File) Tags() string
- func (z *File) Title() string
- func (z *File) UUID() UUID
- func (z *File) ValidateChecksum() error
- func (z *File) Version() (majorVersion, minorVersion uint16)
- type Header
- type Mimetype
- type Namespace
- type UUID
Constants ¶
const ( MagicNumber = uint32(72173914) NoMainPage = ^uint32(0) NoLayoutPage = NoMainPage )
Some useful constants belonging to a ZIM file.
const ( MimetypeDeletedEntry = Mimetype(0xFFFD) MimetypeLinkTarget = Mimetype(0xFFFE) MimetypeRedirectEntry = Mimetype(0xFFFF) )
Possible fixed Mimetype values for Directory Entry.
const ( NamespaceLayout = Namespace('-') // layout, eg. the LayoutPage, CSS, favicon.png (48x48), JavaScript and images not related to the articles NamespaceArticles = Namespace('A') NamespaceArticleMetadata = Namespace('B') NamespaceImagesFiles = Namespace('I') NamespaceImagesText = Namespace('J') NamespaceZimMetadata = Namespace('M') NamespaceCategoriesText = Namespace('U') NamespaceCategoriesArticleList = Namespace('V') NamespaceCategoriesPerArticleCategoryList = Namespace('W') NamespaceFulltextIndex = Namespace('X') // Xapian fulltext index )
Possible values for a Namespace.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Cluster ¶
type Cluster struct {
// contains filtered or unexported fields
}
Cluster stores the uncompressed cluster data (blob positions followed by a sequence of blobs). Each blob belongs to a Directory Entry.
func (*Cluster) BlobAt ¶
BlobAt returns the blob data at blob position of a given Cluster. This is only useful when iteration over all blobs in a Cluster is done. When only a single blob of a Cluster should be retrieved, it's better to use z.BlobReaderAt(clusterPosition, blobPosition) instead. The blob position starts at 0 and ends if an error is returned.
func (*Cluster) WasCompressed ¶
WasCompressed shows if the cluster data was compressed. This information can be used as an indicator about the cluster contents.
type DirectoryEntry ¶
type DirectoryEntry struct {
// contains filtered or unexported fields
}
DirectoryEntry holds the information about a specific article, image or other object in a ZIM file.
func (*DirectoryEntry) BlobNumber ¶
func (e *DirectoryEntry) BlobNumber() uint32
BlobNumber is the blob number inside the uncompressed cluster, where the contents are stored.
func (*DirectoryEntry) ClusterNumber ¶
func (e *DirectoryEntry) ClusterNumber() uint32
ClusterNumber in which the data of this Directory Entry is stored.
func (*DirectoryEntry) IsArticle ¶
func (e *DirectoryEntry) IsArticle() bool
IsArticle checks whether the Directory Entry is an Article
func (*DirectoryEntry) IsDeletedEntry ¶
func (e *DirectoryEntry) IsDeletedEntry() bool
IsDeletedEntry checks whether the Directory Entry is a DeletedEntry
func (*DirectoryEntry) IsLinkTarget ¶
func (e *DirectoryEntry) IsLinkTarget() bool
IsLinkTarget checks whether the Directory Entry is a LinkTarget
func (*DirectoryEntry) IsRedirect ¶
func (e *DirectoryEntry) IsRedirect() bool
IsRedirect checks whether the Directory Entry is a Redirect to another Directory Entry
func (*DirectoryEntry) Mimetype ¶
func (e *DirectoryEntry) Mimetype() Mimetype
Mimetype is the Mimetype of the Directory Entry.
func (*DirectoryEntry) Namespace ¶
func (e *DirectoryEntry) Namespace() Namespace
Namespace defines to which namespace the Directory Entry belongs.
func (*DirectoryEntry) RedirectIndex ¶
func (e *DirectoryEntry) RedirectIndex() uint32
RedirectIndex is a pointer to the Directory Entry of the Redirect Target.
func (*DirectoryEntry) Revision ¶
func (e *DirectoryEntry) Revision() uint32
Revision identifies a revision of the contents of the Directory Entry, needed to identify updates or revisions in the original history.
func (*DirectoryEntry) String ¶
func (e *DirectoryEntry) String() string
func (*DirectoryEntry) Title ¶
func (e *DirectoryEntry) Title() []byte
Title is the title of the Directory Entry.
func (*DirectoryEntry) URL ¶
func (e *DirectoryEntry) URL() []byte
URL is the URL of the Directory Entry, which is unique for the specific Namespace.
type File ¶
type File struct {
// contains filtered or unexported fields
}
File represents a ZIM file and contains the most important information that is retrieved once and used again.
func (*File) ArticleCount ¶
ArticleCount is the total number of articles defined in the pointerlists of the ZIM file.
func (*File) BlobReader ¶
BlobReader returns a LimitedReader for the blob data of the given Directory Entry.
func (*File) BlobReaderAt ¶
func (z *File) BlobReaderAt(clusterPosition, blobPosition uint32) ( reader io.Reader, blobSize int64, err error)
BlobReaderAt returns a LimitedReader for the blob data at the given positions.
func (*File) CalculateChecksum ¶
CalculateChecksum calculates the MD5 checksum of the ZIM file. This could take some time dependent on the size of the file.
func (*File) ClusterAt ¶
ClusterAt returns the Cluster of the ZIM file at the given cluster position. The complete cluster data is stored uncompressed in memory. If the size of the cluster data is more than 32MB an error is returned and the data is not read into memory. Note: Only use this function, when it's needed to read every single blob of a ZIM file into memory (for example when iterating over all contents this improves performance).
func (*File) ClusterCount ¶
ClusterCount is the number of clusters the ZIM file contains.
func (*File) Counter ¶
Counter returns a String containing the number of Directory Entries per Mimetype.
func (*File) Description ¶
Description returns the Description of the ZIM file as found in the Metadata.
func (*File) EntriesWithNamespace ¶
func (z *File) EntriesWithNamespace(namespace Namespace, limit int) []DirectoryEntry
EntriesWithNamespace returns the first n Directory Entries in the Namespace where n <= limit. When the Limit is set to <= 0 it gets the default value 100.
func (*File) EntriesWithSimilarity ¶
func (z *File) EntriesWithSimilarity(namespace Namespace, prefix []byte, limit int) []DirectoryEntry
EntriesWithSimilarity returns Directory Entries in the Namespace that have a similar URL prefix or Title prefix to the given one. When the Limit is set to <= 0 it takes the default value 100.
func (*File) EntriesWithTitlePrefix ¶
func (z *File) EntriesWithTitlePrefix(namespace Namespace, prefix []byte, limit int) []DirectoryEntry
EntriesWithTitlePrefix returns all Directory Entries in the Namespace that have the same Title prefix like the given. When the Limit is set to <= 0 it gets the default value 100.
func (*File) EntriesWithURLPrefix ¶
func (z *File) EntriesWithURLPrefix(namespace Namespace, prefix []byte, limit int) []DirectoryEntry
EntriesWithURLPrefix returns all Directory Entries in the Namespace that have the same URL prefix like the given. When the Limit is set to <= 0 it gets the default value 100.
func (*File) EntryAtTitlePosition ¶
func (z *File) EntryAtTitlePosition(position uint32) (DirectoryEntry, error)
EntryAtTitlePosition returns the Directory Entry at the position as defined in the ordered title pointerlist. If 0 >= position < z.ArticleCount() the returned error is nil. Redirects are not followed automatically.
func (*File) EntryAtURLPosition ¶
func (z *File) EntryAtURLPosition(position uint32) (DirectoryEntry, error)
EntryAtURLPosition returns the Directory Entry at the position as defined in the ordered URL pointerlist. If 0 >= position < z.ArticleCount() the returned error is nil. Redirects are not followed automatically.
func (*File) EntryWithNamespace ¶
func (z *File) EntryWithNamespace(namespace Namespace) ( entry DirectoryEntry, position uint32, found bool)
EntryWithNamespace searches the first Directory Entry in the namespace. If it was found, found is set to true and the returned position will be the position in the URL pointerlist. This can be used to iterate over the next n Directory Entries using z.EntryAtURLPosition(position+n).
func (*File) EntryWithTitlePrefix ¶
func (z *File) EntryWithTitlePrefix(namespace Namespace, prefix []byte) ( entry DirectoryEntry, position uint32, found bool)
EntryWithTitlePrefix searches the first Directory Entry in the namespace having the given title prefix. If it was found, found is set to true and the returned position will be the position in the title pointerlist. This can be used to iterate over the next n Directory Entries using z.EntryAtTitlePosition(position+n).
func (*File) EntryWithURL ¶
func (z *File) EntryWithURL(namespace Namespace, url []byte) ( entry DirectoryEntry, urlPosition uint32, found bool)
EntryWithURL searches for the Directory Entry with the exact URL. If the Directory Entry was found, found is set to true and the returned position will be the position in the URL pointerlist. This can be used to iterate over the next n Directory Entries using z.EntryAtURLPosition(position+n).
func (*File) EntryWithURLPrefix ¶
func (z *File) EntryWithURLPrefix(namespace Namespace, prefix []byte) ( entry DirectoryEntry, position uint32, found bool)
EntryWithURLPrefix searches the first Directory Entry in the namespace having the given URL prefix. If it was found, found is set to true and the returned position will be the position in the URL pointerlist. This can be used to iterate over the next n Directory Entries using z.EntryAtURLPosition(position+n).
func (*File) Favicon ¶
func (z *File) Favicon() (entry DirectoryEntry, err error)
Favicon returns the Directory Entry for the Favicon of the ZIM file
func (*File) FollowRedirect ¶
func (z *File) FollowRedirect(redirectEntry *DirectoryEntry) (DirectoryEntry, error)
FollowRedirect returns the target Directory Entry of the given Redirect Entry
func (*File) InternalChecksum ¶
InternalChecksum is the MD5 checksum for the ZIM file. It's precalculated and saved in the header.
func (*File) LayoutPage ¶
func (z *File) LayoutPage() (DirectoryEntry, error)
LayoutPage returns the Directory Entry for the LayoutPage of the ZIM file
func (*File) LongDescription ¶
LongDescription returns the LongDescription of the ZIM file as found in the Metadata.
func (*File) MainPage ¶
func (z *File) MainPage() (DirectoryEntry, error)
MainPage returns the Directory Entry for the MainPage of the ZIM file
func (*File) MetadataFor ¶
MetadataFor returns the metadata value for a given key. If the key is not set, an empty string is returned.
func (*File) MimetypeList ¶
MimetypeList returns the internal Mimetype list of the ZIM file.
func (*File) ValidateChecksum ¶
ValidateChecksum compares the internal MD5 checksum of the ZIM file with the calculated one.
type Header ¶
type Header struct {
// contains filtered or unexported fields
}
Header is the header of a ZIM file
type Mimetype ¶
type Mimetype uint16
Mimetype describes one of the three possible fixed Mimetypes for a Directory Entry.