docx

package module

v0.5.1 Latest Latest Go to latest Published: Oct 12, 2024 License: MIT Imports: 16 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/xpfo-go/go-docx

Links

Open Source Insights

README ¶

➤ License

This software is licensed under the MIT license.

Documentation ¶

Index ¶

Constants
Variables
func AddPlaceholderDelimiter(s string) string
func ChangeOpenCloseDelimiter(openDelimiter, closeDelimiter string)
func IsDelimitedPlaceholder(s string) bool
func NewFragmentID() int
func NewRunID() int
func RemovePlaceholderDelimiter(s string) string
func ResetFragmentIdCounter()
func ResetRunIdCounter()
func ValidatePositions(document []byte, runs []*Run) error
type Document
- func Open(path string) (*Document, error)
- func OpenBytes(b []byte) (*Document, error)
- func (d *Document) Close()
- func (d *Document) GetFile(fileName string) []byte
- func (d *Document) GetPlaceHoldersList() ([]string, error)
- func (d *Document) Placeholders() (placeholders []*Placeholder)
- func (d *Document) Replace(key, value string) error
- func (d *Document) ReplaceAll(placeholderMap PlaceholderMap) error
- func (d *Document) Runs() (runs []*Run)
- func (d *Document) SetFile(fileName string, fileBytes []byte) error
- func (d *Document) Write(writer io.Writer) error
- func (d *Document) WriteToFile(file string) error
type DocumentRuns
- func (dr *DocumentRuns) Pop() *Run
- func (dr *DocumentRuns) Push(run *Run)
- func (dr DocumentRuns) WithText() DocumentRuns
type FileMap
- func (fm FileMap) Write(writer io.Writer, filename string) error
type Placeholder
- func ParsePlaceholders(runs DocumentRuns, docBytes []byte) (placeholders []*Placeholder, err error)
- func (p Placeholder) EndPos() int64
- func (p Placeholder) StartPos() int64
- func (p Placeholder) Text(docBytes []byte) string
- func (p Placeholder) Valid() bool
type PlaceholderFragment
- func NewPlaceholderFragment(number int, pos Position, run *Run) *PlaceholderFragment
- func (p PlaceholderFragment) EndPos() int64
- func (p *PlaceholderFragment) ShiftAll(deltaLength int64)
- func (p *PlaceholderFragment) ShiftCut(cutLength int64)
- func (p *PlaceholderFragment) ShiftReplace(deltaLength int64)
- func (p PlaceholderFragment) StartPos() int64
- func (p PlaceholderFragment) String(docBytes []byte) string
- func (p PlaceholderFragment) Text(docBytes []byte) string
- func (p PlaceholderFragment) TextLength(docBytes []byte) int64
- func (p PlaceholderFragment) Valid() bool
type PlaceholderMap
type Position
- func (p Position) Match(regexp *regexp.Regexp, data []byte) bool
- func (p Position) Valid() bool
type Reader
- func NewReader(s string) *Reader
- func (r *Reader) Len() int
- func (r *Reader) Pos() int64
- func (r *Reader) Read(b []byte) (int, error)
- func (r *Reader) ReadByte() (byte, error)
- func (r *Reader) Size() int64
- func (r *Reader) String() string
type Replacer
- func NewReplacer(docBytes []byte, placeholder []*Placeholder) *Replacer
- func (r *Replacer) Bytes() []byte
- func (r *Replacer) Replace(placeholderKey string, value string) error
type Run
- func NewEmptyRun() *Run
- func (r *Run) GetText(documentBytes []byte) string
- func (r *Run) String(bytes []byte) string
type RunParser
- func NewRunParser(doc []byte) *RunParser
- func (parser *RunParser) Execute() error
- func (parser *RunParser) Runs() DocumentRuns
type TagPair

Constants ¶

View Source

const (
	// RunElementName is the local name of the XML tag for runs (<w:r>, </w:r> and <w:r/>)
	RunElementName = "r"
	// TextElementName is the local name of the XML tag for text-runs (<w:t> and </w:t>)
	TextElementName = "t"
)

View Source

const (
	// DocumentXml is the relative path where the actual document content resides inside the docx-archive.
	DocumentXml = "word/document.xml"
)

Variables ¶

View Source

var (
	// HeaderPathRegex matches all header files inside the docx-archive.
	HeaderPathRegex = regexp.MustCompile(`word/header[0-9]*.xml`)
	// FooterPathRegex matches all footer files inside the docx-archive.
	FooterPathRegex = regexp.MustCompile(`word/footer[0-9]*.xml`)
	// MediaPathRegex matches all media files inside the docx-archive.
	MediaPathRegex = regexp.MustCompile(`word/media/*`)
)

View Source

var (
	// RunOpenTagRegex matches all OpenTags for runs, including eventually set attributes
	RunOpenTagRegex = regexp.MustCompile(`(<w:r).*>`)
	// RunCloseTagRegex matches the close tag of runs
	RunCloseTagRegex = regexp.MustCompile(`(</w:r>)`)
	// RunSingletonTagRegex matches a singleton run tag
	RunSingletonTagRegex = regexp.MustCompile(`(<w:r/>)`)
	// TextOpenTagRegex matches all OpenTags for text-runs, including eventually set attributes
	TextOpenTagRegex = regexp.MustCompile(`(<w:t).*>`)
	// TextCloseTagRegex matches the close tag of text-runs
	TextCloseTagRegex = regexp.MustCompile(`(</w:t>)`)
	// ErrTagsInvalid is returned if the parsing failed and the result cannot be used.
	// Typically this means that one or more tag-offsets were not parsed correctly which
	// would cause the document to become corrupted as soon as replacing starts.
	ErrTagsInvalid = errors.New("one or more tags are invalid and will cause the XML to be corrupt")
)

View Source

var (
	// OpenDelimiter defines the opening delimiter for the placeholders used inside a docx-document.
	OpenDelimiter string = "@"
	// CloseDelimiter defines the closing delimiter for the placeholders used inside a docx-document.
	CloseDelimiter string = "#"
)

View Source

var (
	// OpenDelimiterRegex is used to quickly match the opening delimiter and find it'str positions.
	OpenDelimiterRegex = regexp.MustCompile(string(OpenDelimiter))
	// CloseDelimiterRegex is used to quickly match the closing delimiter and find it'str positions.
	CloseDelimiterRegex = regexp.MustCompile(string(CloseDelimiter))
)

View Source

var (
	// ErrPlaceholderNotFound is returned if there is no placeholder inside the document.
	ErrPlaceholderNotFound = errors.New("placeholder not found in document")
)

Functions ¶

func AddPlaceholderDelimiter ¶

func AddPlaceholderDelimiter(s string) string

AddPlaceholderDelimiter will wrap the given string with OpenDelimiter and CloseDelimiter. If the given string is already a delimited placeholder, it is returned unchanged.

func ChangeOpenCloseDelimiter ¶

func ChangeOpenCloseDelimiter(openDelimiter, closeDelimiter string)

ChangeOpenCloseDelimiter is used for change the open and close delimiters

func IsDelimitedPlaceholder ¶

func IsDelimitedPlaceholder(s string) bool

IsDelimitedPlaceholder returns true if the given string is a delimited placeholder. It checks whether the first and last rune in the string is the OpenDelimiter and CloseDelimiter respectively. If the string is empty, false is returned.

func NewFragmentID ¶

func NewFragmentID() int

NewFragmentID returns the next Fragment.ID

func NewRunID ¶

func NewRunID() int

NewRunID returns the next Fragment.ID

func RemovePlaceholderDelimiter ¶

func RemovePlaceholderDelimiter(s string) string

RemovePlaceholderDelimiter removes OpenDelimiter and CloseDelimiter from the given text. If the given text is not a delimited placeholder, it is returned unchanged.

func ResetFragmentIdCounter ¶

func ResetFragmentIdCounter()

ResetFragmentIdCounter will reset the fragmentId counter to 0

func ResetRunIdCounter ¶

func ResetRunIdCounter()

ResetRunIdCounter will reset the runId counter to 0

func ValidatePositions ¶

func ValidatePositions(document []byte, runs []*Run) error

ValidatePositions will iterate over all runs and their texts (if any) and ensure that they match their respective regex. If the validation failed, the replacement will not work since offsets are wrong.

Types ¶

type Document ¶

type Document struct {
	// contains filtered or unexported fields
}

Document exposes the main API of the library. It represents the actual docx document which is going to be modified. Although a 'docx' document actually consists of multiple xml files, that fact is not exposed via the Document API. All actions on the Document propagate through the files of the docx-zip-archive.

func Open ¶

func Open(path string) (*Document, error)

Open will open and parse the file pointed to by path. The file must be a valid docx file or an error is returned.

func OpenBytes ¶

func OpenBytes(b []byte) (*Document, error)

OpenBytes allows to create a Document from a byte slice. It behaves just like Open().

Note: In this case, the docxFile property will be nil!

func (*Document) Close ¶

func (d *Document) Close()

Close will close everything :)

func (*Document) GetFile ¶

func (d *Document) GetFile(fileName string) []byte

GetFile returns the content of the given fileName if it exists.

func (*Document) GetPlaceHoldersList ¶

func (d *Document) GetPlaceHoldersList() ([]string, error)

Get placeholders in a human readable form

func (*Document) Placeholders ¶

func (d *Document) Placeholders() (placeholders []*Placeholder)

Placeholders returns all placeholders from the docx document.

func (*Document) Replace ¶

func (d *Document) Replace(key, value string) error

Replace will attempt to replace the given key with the value in every file.

func (*Document) ReplaceAll ¶

func (d *Document) ReplaceAll(placeholderMap PlaceholderMap) error

ReplaceAll will iterate over all files and perform the replacement according to the PlaceholderMap.

func (*Document) Runs ¶

func (d *Document) Runs() (runs []*Run)

Runs returns all runs from all parsed files.

func (*Document) SetFile ¶

func (d *Document) SetFile(fileName string, fileBytes []byte) error

SetFile allows setting the file contents of the given file. The fileName must be known, otherwise an error is returned.

func (*Document) Write ¶

func (d *Document) Write(writer io.Writer) error

Write is responsible for assembling a new .docx docxFile using the modified data as well as all remaining files. Docx files are basically zip archives with many XMLs included. Files which cannot be modified through this lib will just be read from the original docx and copied into the writer.

func (*Document) WriteToFile ¶

func (d *Document) WriteToFile(file string) error

WriteToFile will write the document to a new file. It is important to note that the target file cannot be the same as the path of this document. If the path is not yet created, the function will attempt to MkdirAll() before creating the file.

type DocumentRuns ¶

type DocumentRuns []*Run

DocumentRuns is a convenience type used to describe a slice of runs. It also implements Push() and Pop() which allows it to be used as LIFO stack.

func (*DocumentRuns) Pop ¶

func (dr *DocumentRuns) Pop() *Run

Pop will return the last Run added to the stack and remove it.

func (*DocumentRuns) Push ¶

func (dr *DocumentRuns) Push(run *Run)

Push will push a new Run onto the DocumentRuns stack

func (DocumentRuns) WithText ¶

func (dr DocumentRuns) WithText() DocumentRuns

WithText returns all runs with the HasText flag set

type FileMap ¶

type FileMap map[string][]byte

FileMap is just a convenience type for the map of fileName => fileBytes

func (FileMap) Write ¶

func (fm FileMap) Write(writer io.Writer, filename string) error

Write will try to write the bytes from the map into the given writer.

type Placeholder ¶

type Placeholder struct {
	Fragments []*PlaceholderFragment
}

Placeholder is the internal representation of a parsed placeholder from the docx-archive. A placeholder usually consists of multiple PlaceholderFragments which specify the relative byte-offsets of the fragment inside the underlying byte-data.

func ParsePlaceholders ¶

func ParsePlaceholders(runs DocumentRuns, docBytes []byte) (placeholders []*Placeholder, err error)

ParsePlaceholders will, given the document run positions and the bytes, parse out all placeholders including their fragments.

func (Placeholder) EndPos ¶

func (p Placeholder) EndPos() int64

EndPos returns the absolute end position of the placeholder.

func (Placeholder) StartPos ¶

func (p Placeholder) StartPos() int64

StartPos returns the absolute start position of the placeholder.

func (Placeholder) Text ¶

func (p Placeholder) Text(docBytes []byte) string

Text assembles the placeholder fragments using the given docBytes and returns the full placeholder literal.

func (Placeholder) Valid ¶

func (p Placeholder) Valid() bool

Valid determines whether the placeholder can be used. A placeholder is considered valid, if all fragments are valid.

type PlaceholderFragment ¶

type PlaceholderFragment struct {
	ID       int      // ID is used to identify the fragments globally.
	Position Position // Position of the actual fragment within the run text. 0 == (Run.Text.OpenTag.End + 1)
	Number   int      // numbering fragments for ease of use. Numbering is scoped to placeholders.
	Run      *Run
}

PlaceholderFragment is a part of a placeholder within the document.xml If the full placeholder is e.g. '{foo-bar}', the placeholder might be ripped apart according to the WordprocessingML spec. So it will most likely occur, that the placeholders are split into multiple fragments (e.g. '{foo' and '-bar}').

func NewPlaceholderFragment ¶

func NewPlaceholderFragment(number int, pos Position, run *Run) *PlaceholderFragment

NewPlaceholderFragment returns an initialized PlaceholderFragment with a new, auto-incremented, ID.

func (PlaceholderFragment) EndPos ¶

func (p PlaceholderFragment) EndPos() int64

EndPos returns the absolute end position of the fragment.

func (*PlaceholderFragment) ShiftAll ¶

func (p *PlaceholderFragment) ShiftAll(deltaLength int64)

ShiftAll will shift all fragment position markers by the given amount. The function is used if the underlying byte-data changed and the whole PlaceholderFragment needs to be shifted to a new position to be correct again.

For example, 10 bytes were added to the document and this PlaceholderFragment is positioned after that change inside the document. In that case one needs to shift the fragment by +10 bytes using ShiftAll(10).

func (*PlaceholderFragment) ShiftCut ¶

func (p *PlaceholderFragment) ShiftCut(cutLength int64)

ShiftCut will shift the fragment position markers in such a way that the fragment can be considered empty. This is used in order to preserve the correct positions of the tags.

The function is used if the actual value (text-run value) of the fragment has been removed. For example the fragment-text was: 'remove-me' (9 bytes) If that data was removed from the document, the positions (not all positions) of the fragment need to be adjusted. The text positions are set equal (start == end).

func (*PlaceholderFragment) ShiftReplace ¶

func (p *PlaceholderFragment) ShiftReplace(deltaLength int64)

ShiftReplace is used to adjust the fragment positions after the text value has been replaced. The function is used if the text-value of the fragment has been replaced with different bytes. For example, the fragment text was 'placeholder' (11 bytes) which is replaced with 'a-super-awesome-value' (21 bytes) In that case the deltaLength would be 10. In order to accommodate for the change in bytes you'd need to call ShiftReplace(10)

func (PlaceholderFragment) StartPos ¶

func (p PlaceholderFragment) StartPos() int64

StartPos returns the absolute start position of the fragment.

func (PlaceholderFragment) String ¶

func (p PlaceholderFragment) String(docBytes []byte) string

String spits out the most important bits and pieces of a fragment and can be used for debugging purposes.

func (PlaceholderFragment) Text ¶

func (p PlaceholderFragment) Text(docBytes []byte) string

Text returns the actual text of the fragment given the source bytes. If the given byte slice is not large enough for the offsets, an empty string is returned.

func (PlaceholderFragment) TextLength ¶

func (p PlaceholderFragment) TextLength(docBytes []byte) int64

TextLength returns the actual length of the fragment given a byte source.

func (PlaceholderFragment) Valid ¶

func (p PlaceholderFragment) Valid() bool

Valid returns true if all positions of the fragment are valid.

type PlaceholderMap ¶

type PlaceholderMap map[string]interface{}

PlaceholderMap is the type used to map the placeholder keys (without delimiters) to the replacement values

type Position ¶

type Position struct {
	Start int64
	End   int64
}

Position is a generic position of a tag, represented by byte offsets

func (Position) Match ¶

func (p Position) Match(regexp *regexp.Regexp, data []byte) bool

Match will apply a MatchString using the given regex on the given data and returns true if the position matches the regex inside the data.

func (Position) Valid ¶

func (p Position) Valid() bool

Valid returns true if Start <= End. Only then the position can be used, otherwise there will be a 'slice out of bounds' along the way.

type Reader ¶

type Reader struct {
	// contains filtered or unexported fields
}

Reader is a very basic io.Reader implementation which is capable of returning the current position.

func NewReader ¶

func NewReader(s string) *Reader

NewReader returns a new Reader given a string source.

func (*Reader) Len ¶

func (r *Reader) Len() int

Len returns the current length of the stream which has been read.

func (*Reader) Pos ¶

func (r *Reader) Pos() int64

Pos returns the current position which the reader is at.

func (*Reader) Read ¶

func (r *Reader) Read(b []byte) (int, error)

Read implements the io.Reader interface.

func (*Reader) ReadByte ¶

func (r *Reader) ReadByte() (byte, error)

ReadByte implements hte io.ByteReader interface.

func (*Reader) Size ¶

func (r *Reader) Size() int64

Size returns the size of the string to read.

func (*Reader) String ¶

func (r *Reader) String() string

String implements the Stringer interface.

type Replacer ¶

type Replacer struct {
	ReplaceCount int
	BytesChanged int64
	// contains filtered or unexported fields
}

Replacer is the key struct which works on the parsed DOCX document.

func NewReplacer ¶

func NewReplacer(docBytes []byte, placeholder []*Placeholder) *Replacer

NewReplacer returns a new Replacer.

func (*Replacer) Bytes ¶

func (r *Replacer) Bytes() []byte

Bytes returns the document bytes. If called after Replace(), the bytes will be modified.

func (*Replacer) Replace ¶

func (r *Replacer) Replace(placeholderKey string, value string) error

Replace will replace all occurrences of the placeholderKey with the given value. The function is synced with a mutex as it is not concurrency safe.

type Run ¶

type Run struct {
	TagPair
	ID      int
	Text    TagPair // Text is the <w:t> tag pair which is always within a run and cannot be standalone.
	HasText bool
}

Run defines a non-block region of text with a common set of properties. It is specified with the <w:r> element. In our case the run is specified by four byte positions (start and end tag).

func NewEmptyRun ¶

func NewEmptyRun() *Run

NewEmptyRun returns a new, empty run which has only an ID set.

func (*Run) GetText ¶

func (r *Run) GetText(documentBytes []byte) string

GetText returns the text of the run, if any. If the run does not have a text or the given byte slice is too small, an empty string is returned

func (*Run) String ¶

func (r *Run) String(bytes []byte) string

String returns a string representation of the run, given the source bytes. It may be helpful in debugging.

type RunParser ¶

type RunParser struct {
	// contains filtered or unexported fields
}

RunParser can parse a list of Runs from a given byte slice.

func NewRunParser ¶

func NewRunParser(doc []byte) *RunParser

NewRunParser returns an initialized RunParser given the source-bytes.

func (*RunParser) Execute ¶

func (parser *RunParser) Execute() error

Execute will fire up the parser. The parser will do two passes on the given document. First, all <w:r> tags are located and marked. Then, inside that run tags the <w:t> tags are located.

func (*RunParser) Runs ¶

func (parser *RunParser) Runs() DocumentRuns

Runs returns the all runs found by the parser.

type TagPair ¶

type TagPair struct {
	OpenTag  Position
	CloseTag Position
}

TagPair describes an opening and closing tag position.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL