Documentation ¶
Index ¶
- Constants
- Variables
- func AddPlaceholderDelimiter(s string) string
- func IsDelimitedPlaceholder(s string) bool
- func NewFragmentID() int
- func NewRunID() int
- func RemovePlaceholderDelimiter(s string) string
- func ResetFragmentIdCounter()
- func ResetRunIdCounter()
- func ValidatePositions(document []byte, runs []*Run) error
- type Document
- func (d *Document) Close()
- func (d *Document) GetFile(fileName string) []byte
- func (d *Document) Placeholders() (placeholders []*Placeholder)
- func (d *Document) Replace(key, value string) error
- func (d *Document) ReplaceAll(placeholderMap PlaceholderMap) error
- func (d *Document) Runs() (runs []*Run)
- func (d *Document) SetFile(fileName string, fileBytes []byte) error
- func (d *Document) Write(writer io.Writer) error
- func (d *Document) WriteToFile(file string) error
- type DocumentRuns
- type FileMap
- type Placeholder
- type PlaceholderFragment
- func (p PlaceholderFragment) EndPos() int64
- func (p *PlaceholderFragment) ShiftAll(deltaLength int64)
- func (p *PlaceholderFragment) ShiftCut(cutLength int64)
- func (p *PlaceholderFragment) ShiftReplace(deltaLength int64)
- func (p PlaceholderFragment) StartPos() int64
- func (p PlaceholderFragment) String(docBytes []byte) string
- func (p PlaceholderFragment) Text(docBytes []byte) string
- func (p PlaceholderFragment) TextLength(docBytes []byte) int64
- func (p PlaceholderFragment) Valid() bool
- type PlaceholderMap
- type Position
- type Reader
- type Replacer
- type Run
- type RunParser
- type TagPair
Constants ¶
const ( // RunElementName is the local name of the XML tag for runs (<w:r>, </w:r> and <w:r/>) RunElementName = "r" // TextElementName is the local name of the XML tag for text-runs (<w:t> and </w:t>) TextElementName = "t" )
const ( // OpenDelimiter defines the opening delimiter for the placeholders used inside a docx-document. OpenDelimiter rune = '{' // CloseDelimiter defines the closing delimiter for the placeholders used inside a docx-document. CloseDelimiter rune = '}' )
const (
// DocumentXml is the relative path where the actual document content resides inside the docx-archive.
DocumentXml = "word/document.xml"
)
Variables ¶
var ( // HeaderPathRegex matches all header files inside the docx-archive. HeaderPathRegex = regexp.MustCompile(`word/header[0-9]*.xml`) FooterPathRegex = regexp.MustCompile(`word/footer[0-9]*.xml`) )
var ( // RunOpenTagRegex matches all OpenTags for runs, including eventually set attributes RunOpenTagRegex = regexp.MustCompile(`(<w:r).*>`) // RunCloseTagRegex matches the close tag of runs RunCloseTagRegex = regexp.MustCompile(`(</w:r>)`) // RunSingletonTagRegex matches a singleton run tag RunSingletonTagRegex = regexp.MustCompile(`(<w:r/>)`) // TextOpenTagRegex matches all OpenTags for text-runs, including eventually set attributes TextOpenTagRegex = regexp.MustCompile(`(<w:t).*>`) // TextCloseTagRegex matches the close tag of text-runs TextCloseTagRegex = regexp.MustCompile(`(</w:t>)`) // ErrTagsInvalid is returned if the parsing failed and the result cannot be used. // Typically this means that one or more tag-offsets were not parsed correctly which // would cause the document to become corrupted as soon as replacing starts. ErrTagsInvalid = errors.New("one or more tags are invalid and will cause the XML to be corrupt") )
var ( // OpenDelimiterRegex is used to quickly match the opening delimiter and find it'str positions. OpenDelimiterRegex = regexp.MustCompile(string(OpenDelimiter)) // CloseDelimiterRegex is used to quickly match the closing delimiter and find it'str positions. CloseDelimiterRegex = regexp.MustCompile(string(CloseDelimiter)) )
var ( // ErrPlaceholderNotFound is returned if there is no placeholder inside the document. ErrPlaceholderNotFound = errors.New("placeholder not found in document") )
Functions ¶
func AddPlaceholderDelimiter ¶
AddPlaceholderDelimiter will wrap the given string with OpenDelimiter and CloseDelimiter. If the given string is already a delimited placeholder, it is returned unchanged.
func IsDelimitedPlaceholder ¶
IsDelimitedPlaceholder returns true if the given string is a delimited placeholder. It checks whether the first and last rune in the string is the OpenDelimiter and CloseDelimiter respectively. If the string is empty, false is returned.
func RemovePlaceholderDelimiter ¶
RemovePlaceholderDelimiter removes OpenDelimiter and CloseDelimiter from the given text. If the given text is not a delimited placeholder, it is returned unchanged.
func ResetFragmentIdCounter ¶
func ResetFragmentIdCounter()
ResetFragmentIdCounter will reset the fragmentId counter to 0
func ResetRunIdCounter ¶
func ResetRunIdCounter()
ResetRunIdCounter will reset the runId counter to 0
func ValidatePositions ¶
ValidatePositions will iterate over all runs and their texts (if any) and ensure that they match their respective regex. If the validation failed, the replacement will not work since offsets are wrong.
Types ¶
type Document ¶
type Document struct {
// contains filtered or unexported fields
}
Document exposes the main API of the library. It represents the actual docx document which is going to be modified. Although a 'docx' document actually consists of multiple xml files, that fact is not exposed via the Document API. All actions on the Document propagate through the files of the docx-zip-archive.
func Open ¶
Open will open and parse the file pointed to by path. The file must be a valid docx file or an error is returned.
func OpenBytes ¶
OpenBytes allows to create a Document from a byte slice. It behaves just like Open().
Note: In this case, the docxFile property will be nil!
func (*Document) Placeholders ¶
func (d *Document) Placeholders() (placeholders []*Placeholder)
Placeholders returns all placeholders from the docx document.
func (*Document) Replace ¶
Replace will attempt to replace the given key with the value in every file.
func (*Document) ReplaceAll ¶
func (d *Document) ReplaceAll(placeholderMap PlaceholderMap) error
ReplaceAll will iterate over all files and perform the replacement according to the PlaceholderMap.
func (*Document) SetFile ¶
SetFile allows setting the file contents of the given file. The fileName must be known, otherwise an error is returned.
func (*Document) Write ¶
Write is responsible for assembling a new .docx docxFile using the modified data as well as all remaining files. Docx files are basically zip archives with many XMLs included. Files which cannot be modified through this lib will just be read from the original docx and copied into the writer.
func (*Document) WriteToFile ¶
WriteToFile will write the document to a new file. It is important to note that the target file cannot be the same as the path of this document. If the path is not yet created, the function will attempt to MkdirAll() before creating the file.
type DocumentRuns ¶
type DocumentRuns []*Run
DocumentRuns is a convenience type used to describe a slice of runs. It also implements Push() and Pop() which allows it to be used as LIFO stack.
func (*DocumentRuns) Pop ¶
func (dr *DocumentRuns) Pop() *Run
Pop will return the last Run added to the stack and remove it.
func (*DocumentRuns) Push ¶
func (dr *DocumentRuns) Push(run *Run)
Push will push a new Run onto the DocumentRuns stack
func (DocumentRuns) WithText ¶
func (dr DocumentRuns) WithText() DocumentRuns
WithText returns all runs with the HasText flag set
type Placeholder ¶
type Placeholder struct {
Fragments []*PlaceholderFragment
}
Placeholder is the internal representation of a parsed placeholder from the docx-archive. A placeholder usually consists of multiple PlaceholderFragments which specify the relative byte-offsets of the fragment inside the underlying byte-data.
func ParsePlaceholders ¶
func ParsePlaceholders(runs DocumentRuns, docBytes []byte) (placeholders []*Placeholder, err error)
ParsePlaceholders will, given the document run positions and the bytes, parse out all placeholders including their fragments.
func (Placeholder) EndPos ¶
func (p Placeholder) EndPos() int64
EndPos returns the absolute end position of the placeholder.
func (Placeholder) StartPos ¶
func (p Placeholder) StartPos() int64
StartPos returns the absolute start position of the placeholder.
func (Placeholder) Text ¶
func (p Placeholder) Text(docBytes []byte) string
Text assembles the placeholder fragments using the given docBytes and returns the full placeholder literal.
func (Placeholder) Valid ¶
func (p Placeholder) Valid() bool
Valid determines whether the placeholder can be used. A placeholder is considered valid, if all fragments are valid.
type PlaceholderFragment ¶
type PlaceholderFragment struct { ID int // ID is used to identify the fragments globally. Position Position // Position of the actual fragment within the run text. 0 == (Run.Text.OpenTag.End + 1) Number int // numbering fragments for ease of use. Numbering is scoped to placeholders. Run *Run }
PlaceholderFragment is a part of a placeholder within the document.xml If the full placeholder is e.g. '{foo-bar}', the placeholder might be ripped apart according to the WordprocessingML spec. So it will most likely occur, that the placeholders are split into multiple fragments (e.g. '{foo' and '-bar}').
func NewPlaceholderFragment ¶
func NewPlaceholderFragment(number int, pos Position, run *Run) *PlaceholderFragment
NewPlaceholderFragment returns an initialized PlaceholderFragment with a new, auto-incremented, ID.
func (PlaceholderFragment) EndPos ¶
func (p PlaceholderFragment) EndPos() int64
EndPos returns the absolute end position of the fragment.
func (*PlaceholderFragment) ShiftAll ¶
func (p *PlaceholderFragment) ShiftAll(deltaLength int64)
ShiftAll will shift all fragment position markers by the given amount. The function is used if the underlying byte-data changed and the whole PlaceholderFragment needs to be shifted to a new position to be correct again.
For example, 10 bytes were added to the document and this PlaceholderFragment is positioned after that change inside the document. In that case one needs to shift the fragment by +10 bytes using ShiftAll(10).
func (*PlaceholderFragment) ShiftCut ¶
func (p *PlaceholderFragment) ShiftCut(cutLength int64)
ShiftCut will shift the fragment position markers in such a way that the fragment can be considered empty. This is used in order to preserve the correct positions of the tags.
The function is used if the actual value (text-run value) of the fragment has been removed. For example the fragment-text was: 'remove-me' (9 bytes) If that data was removed from the document, the positions (not all positions) of the fragment need to be adjusted. The text positions are set equal (start == end).
func (*PlaceholderFragment) ShiftReplace ¶
func (p *PlaceholderFragment) ShiftReplace(deltaLength int64)
ShiftReplace is used to adjust the fragment positions after the text value has been replaced. The function is used if the text-value of the fragment has been replaced with different bytes. For example, the fragment text was 'placeholder' (11 bytes) which is replaced with 'a-super-awesome-value' (21 bytes) In that case the deltaLength would be 10. In order to accommodate for the change in bytes you'd need to call ShiftReplace(10)
func (PlaceholderFragment) StartPos ¶
func (p PlaceholderFragment) StartPos() int64
StartPos returns the absolute start position of the fragment.
func (PlaceholderFragment) String ¶
func (p PlaceholderFragment) String(docBytes []byte) string
String spits out the most important bits and pieces of a fragment and can be used for debugging purposes.
func (PlaceholderFragment) Text ¶
func (p PlaceholderFragment) Text(docBytes []byte) string
Text returns the actual text of the fragment given the source bytes. If the given byte slice is not large enough for the offsets, an empty string is returned.
func (PlaceholderFragment) TextLength ¶
func (p PlaceholderFragment) TextLength(docBytes []byte) int64
TextLength returns the actual length of the fragment given a byte source.
func (PlaceholderFragment) Valid ¶
func (p PlaceholderFragment) Valid() bool
Valid returns true if all positions of the fragment are valid.
type PlaceholderMap ¶
type PlaceholderMap map[string]interface{}
PlaceholderMap is the type used to map the placeholder keys (without delimiters) to the replacement values
type Position ¶
Position is a generic position of a tag, represented by byte offsets
type Reader ¶
type Reader struct {
// contains filtered or unexported fields
}
Reader is a very basic io.Reader implementation which is capable of returning the current position.
type Replacer ¶
type Replacer struct { ReplaceCount int BytesChanged int64 // contains filtered or unexported fields }
Replacer is the key struct which works on the parsed DOCX document.
func NewReplacer ¶
func NewReplacer(docBytes []byte, placeholder []*Placeholder) *Replacer
NewReplacer returns a new Replacer.
func (*Replacer) Bytes ¶
Bytes returns the document bytes. If called after Replace(), the bytes will be modified.
type Run ¶
type Run struct { TagPair ID int Text TagPair // Text is the <w:t> tag pair which is always within a run and cannot be standalone. HasText bool }
Run defines a non-block region of text with a common set of properties. It is specified with the <w:r> element. In our case the run is specified by four byte positions (start and end tag).
func NewEmptyRun ¶
func NewEmptyRun() *Run
NewEmptyRun returns a new, empty run which has only an ID set.
type RunParser ¶
type RunParser struct {
// contains filtered or unexported fields
}
RunParser can parse a list of Runs from a given byte slice.
func NewRunParser ¶
NewRunParser returns an initialized RunParser given the source-bytes.
func (*RunParser) Execute ¶
Execute will fire up the parser. The parser will do two passes on the given document. First, all <w:r> tags are located and marked. Then, inside that run tags the <w:t> tags are located.
func (*RunParser) Runs ¶
func (parser *RunParser) Runs() DocumentRuns
Runs returns the all runs found by the parser.