fitz

package module
v0.0.0-...-5066a94 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 20, 2022 License: AGPL-3.0 Imports: 9 Imported by: 0

README

go-fitz

Go wrapper for MuPDF fitz library that can extract pages from PDF and EPUB documents as images, text, html or svg.

Install

go get -u github.com/karmdip-mi/go-fitz

Build tags

  • extlib - use external MuPDF library
  • static - build with static external MuPDF library (used with extlib)
  • nopie - use this with GCC older then 7

Example

package main

import (
	"fmt"
	"image/jpeg"
	"io/ioutil"
	"os"
	"path/filepath"

	"github.com/karmdip-mi/go-fitz"
)

func main() {
	doc, err := fitz.New("test.pdf")
	if err != nil {
		panic(err)
	}

	defer doc.Close()

	tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
	if err != nil {
		panic(err)
	}

	// Extract pages as images
	for n := 0; n < doc.NumPage(); n++ {
		img, err := doc.Image(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
		if err != nil {
			panic(err)
		}

		err = jpeg.Encode(f, img, &jpeg.Options{jpeg.DefaultQuality})
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as text
	for n := 0; n < doc.NumPage(); n++ {
		text, err := doc.Text(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(text)
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as html
	for n := 0; n < doc.NumPage(); n++ {
		html, err := doc.HTML(n, true)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.html", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(html)
		if err != nil {
			panic(err)
		}

		f.Close()
	}
}

Documentation

Overview

Package fitz provides wrapper for the [MuPDF](http://mupdf.com/) fitz library that can extract pages from PDF and EPUB documents as images, text, html or svg.

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	ErrNoSuchFile    = errors.New("fitz: no such file")
	ErrCreateContext = errors.New("fitz: cannot create context")
	ErrOpenDocument  = errors.New("fitz: cannot open document")
	ErrOpenMemory    = errors.New("fitz: cannot open memory")
	ErrPageMissing   = errors.New("fitz: page missing")
	ErrCreatePixmap  = errors.New("fitz: cannot create pixmap")
	ErrPixmapSamples = errors.New("fitz: cannot get pixmap samples")
	ErrNeedsPassword = errors.New("fitz: document needs password")
	ErrLoadOutline   = errors.New("fitz: cannot load outline")
)

Errors.

Functions

This section is empty.

Types

type Document

type Document struct {
	// contains filtered or unexported fields
}

Document represents fitz document.

func New

func New(filename string) (f *Document, err error)

New returns new fitz document.

Example
doc, err := New("test.pdf")
if err != nil {
	panic(err)
}

defer doc.Close()

tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
if err != nil {
	panic(err)
}

// Extract pages as images
for n := 0; n < doc.NumPage(); n++ {
	img, err := doc.Image(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
	if err != nil {
		panic(err)
	}

	err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality})
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as text
for n := 0; n < doc.NumPage(); n++ {
	text, err := doc.Text(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(text)
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as html
for n := 0; n < doc.NumPage(); n++ {
	html, err := doc.HTML(n, true)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.html", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(html)
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as svg
for n := 0; n < doc.NumPage(); n++ {
	svg, err := doc.SVG(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.svg", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(svg)
	if err != nil {
		panic(err)
	}

	f.Close()
}
Output:

func NewFromMemory

func NewFromMemory(b []byte) (f *Document, err error)

NewFromMemory returns new fitz document from byte slice.

func NewFromReader

func NewFromReader(r io.Reader) (f *Document, err error)

NewFromReader returns new fitz document from io.Reader.

func (*Document) Close

func (f *Document) Close() error

Close closes the underlying fitz document.

func (*Document) HTML

func (f *Document) HTML(pageNumber int, header bool) (string, error)

HTML returns html for given page number.

func (*Document) Image

func (f *Document) Image(pageNumber int) (image.Image, error)

Image returns image for given page number.

func (*Document) ImageDPI

func (f *Document) ImageDPI(pageNumber int, dpi float64) (image.Image, error)

ImageDPI returns image for given page number and DPI.

func (*Document) ImagePNG

func (f *Document) ImagePNG(pageNumber int, dpi float64) ([]byte, error)

ImagePNG returns image for given page number as PNG bytes.

func (*Document) Metadata

func (f *Document) Metadata() map[string]string

Metadata returns the map with standard metadata.

func (*Document) NumPage

func (f *Document) NumPage() int

NumPage returns total number of pages in document.

func (*Document) SVG

func (f *Document) SVG(pageNumber int) (string, error)

SVG returns svg document for given page number.

func (*Document) Text

func (f *Document) Text(pageNumber int) (string, error)

Text returns text for given page number.

func (*Document) ToC

func (f *Document) ToC() ([]Outline, error)

ToC returns the table of contents (also known as outline).

type Outline

type Outline struct {
	// Hierarchy level of the entry (starting from 1).
	Level int
	// Title of outline item.
	Title string
	// Destination in the document to be displayed when this outline item is activated.
	URI string
	// The page number of an internal link.
	Page int
	// Top.
	Top float64
}

Outline type.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL