fitz

package module
v1.24.14 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 1, 2024 License: AGPL-3.0 Imports: 10 Imported by: 73

README

go-fitz

Build Status GoDoc Go Report Card

Go wrapper for MuPDF fitz library that can extract pages from PDF, EPUB, MOBI, DOCX, XLSX and PPTX documents as IMG, TXT, HTML or SVG.

Build tags

  • extlib - use external MuPDF library
  • static - build with static external MuPDF library (used with extlib)
  • pkgconfig - enable pkg-config (used with extlib)
  • musl - use musl compiled library
  • nocgo - experimental purego implementation (can also be used with CGO_ENABLED=0)

Notes

The bundled libraries are built without CJK fonts, if you need them you must use the external library.

Calling e.g. Image() or Text() methods concurrently for the same document is not supported.

Purego implementation requires libffi and libmupdf shared libraries on runtime. You must set fitz.FzVersion in your code or set FZ_VERSION environment variable to exact version of the shared library.

Example

package main

import (
	"fmt"
	"image/jpeg"
	"os"
	"path/filepath"

	"github.com/gen2brain/go-fitz"
)

func main() {
	doc, err := fitz.New("test.pdf")
	if err != nil {
		panic(err)
	}

	defer doc.Close()

	tmpDir, err := os.MkdirTemp(os.TempDir(), "fitz")
	if err != nil {
		panic(err)
	}

	// Extract pages as images
	for n := 0; n < doc.NumPage(); n++ {
		img, err := doc.Image(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
		if err != nil {
			panic(err)
		}

		err = jpeg.Encode(f, img, &jpeg.Options{jpeg.DefaultQuality})
		if err != nil {
			panic(err)
		}

		f.Close()
	}
}

Documentation

Overview

Package fitz provides wrapper for the [MuPDF](http://mupdf.com/) fitz library that can extract pages from PDF, EPUB, MOBI, DOCX, XLSX and PPTX documents as IMG, TXT, HTML or SVG.

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	ErrNoSuchFile      = errors.New("fitz: no such file")
	ErrCreateContext   = errors.New("fitz: cannot create context")
	ErrOpenDocument    = errors.New("fitz: cannot open document")
	ErrOpenMemory      = errors.New("fitz: cannot open memory")
	ErrLoadPage        = errors.New("fitz: cannot load page")
	ErrRunPageContents = errors.New("fitz: cannot run page contents")
	ErrPageMissing     = errors.New("fitz: page missing")
	ErrCreatePixmap    = errors.New("fitz: cannot create pixmap")
	ErrPixmapSamples   = errors.New("fitz: cannot get pixmap samples")
	ErrNeedsPassword   = errors.New("fitz: document needs password")
	ErrLoadOutline     = errors.New("fitz: cannot load outline")
)

Errors.

View Source
var FzVersion = "1.24.9"

FzVersion is used for experimental purego implementation, it must be exactly the same as libmupdf shared library version. It is also possible to set `FZ_VERSION` environment variable.

View Source
var MaxStore = 256 << 20

MaxStore is maximum size in bytes of the resource store, before it will start evicting cached resources such as fonts and images.

Functions

This section is empty.

Types

type Document

type Document struct {
	// contains filtered or unexported fields
}

Document represents fitz document.

func New

func New(filename string) (f *Document, err error)

New returns new fitz document.

Example
doc, err := fitz.New("test.pdf")
if err != nil {
	panic(err)
}

defer doc.Close()

tmpDir, err := os.MkdirTemp(os.TempDir(), "fitz")
if err != nil {
	panic(err)
}

// Extract pages as images
for n := 0; n < doc.NumPage(); n++ {
	img, err := doc.Image(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
	if err != nil {
		panic(err)
	}

	err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality})
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as text
for n := 0; n < doc.NumPage(); n++ {
	text, err := doc.Text(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(text)
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as html
for n := 0; n < doc.NumPage(); n++ {
	html, err := doc.HTML(n, true)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.html", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(html)
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as svg
for n := 0; n < doc.NumPage(); n++ {
	svg, err := doc.SVG(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.svg", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(svg)
	if err != nil {
		panic(err)
	}

	f.Close()
}
Output:

func NewFromMemory

func NewFromMemory(b []byte) (f *Document, err error)

NewFromMemory returns new fitz document from byte slice.

func NewFromReader

func NewFromReader(r io.Reader) (f *Document, err error)

NewFromReader returns new fitz document from io.Reader.

func (*Document) Bound added in v1.22.2

func (f *Document) Bound(pageNumber int) (image.Rectangle, error)

Bound gives the Bounds of a given Page in the document.

func (*Document) Close

func (f *Document) Close() error

Close closes the underlying fitz document.

func (*Document) HTML

func (f *Document) HTML(pageNumber int, header bool) (string, error)

HTML returns html for given page number.

func (*Document) Image

func (f *Document) Image(pageNumber int) (*image.RGBA, error)

Image returns image for given page number.

func (*Document) ImageDPI

func (f *Document) ImageDPI(pageNumber int, dpi float64) (*image.RGBA, error)

ImageDPI returns image for given page number and DPI.

func (*Document) ImagePNG

func (f *Document) ImagePNG(pageNumber int, dpi float64) ([]byte, error)

ImagePNG returns image for given page number as PNG bytes.

func (f *Document) Links(pageNumber int) ([]Link, error)

Links returns slice of links for given page number.

func (*Document) Metadata

func (f *Document) Metadata() map[string]string

Metadata returns the map with standard metadata.

func (*Document) NumPage

func (f *Document) NumPage() int

NumPage returns total number of pages in document.

func (*Document) SVG

func (f *Document) SVG(pageNumber int) (string, error)

SVG returns svg document for given page number.

func (*Document) Text

func (f *Document) Text(pageNumber int) (string, error)

Text returns text for given page number.

func (*Document) ToC

func (f *Document) ToC() ([]Outline, error)

ToC returns the table of contents (also known as outline).

type Link struct {
	URI string
}

Link type.

type Outline

type Outline struct {
	// Hierarchy level of the entry (starting from 1).
	Level int
	// Title of outline item.
	Title string
	// Destination in the document to be displayed when this outline item is activated.
	URI string
	// The page number of an internal link.
	Page int
	// Top.
	Top float64
}

Outline type.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL