xmlparser

package module
v1.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 26, 2020 License: BSD-3-Clause Imports: 5 Imported by: 0

README

xml stream parser

xml-stream-parser is xml parser for GO. It is efficient to parse large xml data with streaming fashion.

Usage

<?xml version="1.0" encoding="UTF-8"?>
<bookstore number="2" loc="273456">
   <book>
      <title>The Iliad and The Odyssey</title>
      <price>12.95</price>
      <comments>
         <userComment rating="4">Best translation I've read.</userComment>
         <userComment rating="2">I like other versions better.</userComment>
      </comments>
   </book>
   <book>
      <title>Anthology of World Literature</title>
      <price>24.95</price>
      <comments>
         <userComment rating="3">Needs more modern literature.</userComment>
         <userComment rating="4">Excellent overview of world literature.</userComment>
      </comments>
   </book>
   <journal>
      <title>Journal of XML parsing</title>
      <issue>1</issue>
   </journal>
</bookstore>

Stream over books and journals

f, _ := os.Open("input.xml")
br := bufio.NewReaderSize(f,65536)
parser := xmlparser.NewXMLParser(br, "book", "journal")

for xml := range parser.Stream() {
   fmt.Println(xml.Childs["title"][0].InnerText)
   if xml.Name == "book" {
      fmt.Println(xml.Childs["comments"][0].Childs["userComment"][0].Attrs["rating"])
      fmt.Println(xml.Childs["comments"][0].Childs["userComment"][0].InnerText)
   }
}

Skip tags for speed

parser := xmlparser.NewXMLParser(br, "book").SkipElements([]string{"price", "comments"})

Attributes only

parser := xmlparser.NewXMLParser(br, "bookstore", "book").ParseAttributesOnly("bookstore")

Error handlings

for xml := range parser.Stream() {
   if xml.Err !=nil {
      // handle error
   }
}

Progress of parsing

// total byte read to calculate the progress of parsing
parser.TotalReadSize

Xpath query (for now this feature is not tagged)


parser := xmlparser.NewXMLParser(bufreader, "bookstore").EnableXpath()

for xml := range p.Stream() {
   // select books 
   xml.SelectElements("//book")
   xml.SelectElements("./book")
   xml.SelectElements("book")
   // select titles
   xml.SelectElements("./book/title")
   // select book with price condition
   xml.SelectElements("//book[price>=20.95]"))
   //comments with rating 4
   xml.SelectElements("//book/comments/userComment[@rating='4']")
}
// for evaluate function or reuse existing xpath expression
// sum of all the book price
expr, err := p.CompileXpath("sum(//book/price)")
price := expr.Evaluate(p.CreateXPathNavigator(xml)).(float64)

xpath functionality implemented via xpath library check more examples in its documentation

If you interested check also json parser which works similarly

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type XMLElement

type XMLElement struct {
	Name      string
	Attrs     map[string]string
	InnerText string
	Childs    map[string][]XMLElement
	Err       error
	// contains filtered or unexported fields
}

func (*XMLElement) FirstChild added in v1.3.0

func (n *XMLElement) FirstChild() *XMLElement

func (*XMLElement) LastChild added in v1.3.0

func (n *XMLElement) LastChild() *XMLElement

func (*XMLElement) NextSibling added in v1.3.0

func (n *XMLElement) NextSibling() *XMLElement

func (*XMLElement) PrevSibling added in v1.3.0

func (n *XMLElement) PrevSibling() *XMLElement

func (*XMLElement) SelectElement added in v1.3.0

func (n *XMLElement) SelectElement(exp string) (*XMLElement, error)

SelectElement finds child elements with the specified xpath expression.

func (*XMLElement) SelectElements added in v1.3.0

func (n *XMLElement) SelectElements(exp string) ([]*XMLElement, error)

SelectElements finds child elements with the specified xpath expression.

type XMLParser

type XMLParser struct {
	TotalReadSize uint64
	// contains filtered or unexported fields
}

func NewXMLParser added in v1.1.1

func NewXMLParser(reader *bufio.Reader, loopElements ...string) *XMLParser

func (*XMLParser) CompileXpath added in v1.3.0

func (x *XMLParser) CompileXpath(expr string) (*xpath.Expr, error)

Compile the given xpath expression

func (*XMLParser) CreateXPathNavigator added in v1.3.0

func (x *XMLParser) CreateXPathNavigator(top *XMLElement) *XmlNodeNavigator

CreateXPathNavigator creates a new xpath.NodeNavigator for the specified html.Node.

func (*XMLParser) EnableXpath added in v1.3.0

func (x *XMLParser) EnableXpath() *XMLParser

func (*XMLParser) ParseAttributesOnly added in v1.3.0

func (x *XMLParser) ParseAttributesOnly(loopElements ...string) *XMLParser

func (*XMLParser) SkipElements added in v1.1.0

func (x *XMLParser) SkipElements(skipElements []string) *XMLParser

func (*XMLParser) SkipOuterElements added in v1.1.5

func (x *XMLParser) SkipOuterElements() *XMLParser

by default skip elements works for stream elements childs if this method called parser skip also outer elements

func (*XMLParser) Stream added in v1.1.0

func (x *XMLParser) Stream() chan *XMLElement

type XmlNodeNavigator added in v1.3.0

type XmlNodeNavigator struct {
	// contains filtered or unexported fields
}

func (*XmlNodeNavigator) Copy added in v1.3.0

func (*XmlNodeNavigator) Current added in v1.3.0

func (x *XmlNodeNavigator) Current() *XMLElement

func (*XmlNodeNavigator) LocalName added in v1.3.0

func (x *XmlNodeNavigator) LocalName() string

func (*XmlNodeNavigator) MoveTo added in v1.3.0

func (x *XmlNodeNavigator) MoveTo(other xpath.NodeNavigator) bool

func (*XmlNodeNavigator) MoveToChild added in v1.3.0

func (x *XmlNodeNavigator) MoveToChild() bool

func (*XmlNodeNavigator) MoveToFirst added in v1.3.0

func (x *XmlNodeNavigator) MoveToFirst() bool

func (*XmlNodeNavigator) MoveToNext added in v1.3.0

func (x *XmlNodeNavigator) MoveToNext() bool

func (*XmlNodeNavigator) MoveToNextAttribute added in v1.3.0

func (x *XmlNodeNavigator) MoveToNextAttribute() bool

func (*XmlNodeNavigator) MoveToParent added in v1.3.0

func (x *XmlNodeNavigator) MoveToParent() bool

func (*XmlNodeNavigator) MoveToPrevious added in v1.3.0

func (x *XmlNodeNavigator) MoveToPrevious() bool

func (*XmlNodeNavigator) MoveToRoot added in v1.3.0

func (x *XmlNodeNavigator) MoveToRoot()

func (*XmlNodeNavigator) NodeType added in v1.3.0

func (x *XmlNodeNavigator) NodeType() xpath.NodeType

func (*XmlNodeNavigator) Prefix added in v1.3.0

func (x *XmlNodeNavigator) Prefix() string

func (*XmlNodeNavigator) String added in v1.3.0

func (x *XmlNodeNavigator) String() string

func (*XmlNodeNavigator) Value added in v1.3.0

func (x *XmlNodeNavigator) Value() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL