Documentation
¶
Overview ¶
Package textractor provides functionality to work with the Amazon Textract service.
Index ¶
- type AnalyzeDocumentPage
- type AnalyzeDocumentSchema
- type AnalyzeExpenseJSONResponse
- type AnalyzeExpensePage
- type BoundingBox
- func (bb *BoundingBox) Bottom() float32
- func (bb *BoundingBox) Height() float32
- func (bb *BoundingBox) HorizontalCenter() float32
- func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
- func (bb *BoundingBox) Left() float32
- func (bb *BoundingBox) Right() float32
- func (bb *BoundingBox) String() string
- func (bb *BoundingBox) Top() float32
- func (bb *BoundingBox) Union(other *BoundingBox) *BoundingBox
- func (bb *BoundingBox) VerticalCenter() float32
- func (bb *BoundingBox) Width() float32
- type Cell
- type Content
- type Document
- type Field
- type FieldKey
- func (c *FieldKey) Block() types.Block
- func (c *FieldKey) Confidence() float32
- func (c *FieldKey) Geometry() *Geometry
- func (c *FieldKey) ID() string
- func (fk *FieldKey) OCRConfidence() *OCRConfidence
- func (fk *FieldKey) String() string
- func (fk *FieldKey) Text() string
- func (fk *FieldKey) Words() []*Word
- type FieldValue
- func (c *FieldValue) Block() types.Block
- func (c *FieldValue) Confidence() float32
- func (c *FieldValue) Geometry() *Geometry
- func (c *FieldValue) ID() string
- func (fv *FieldValue) OCRConfidence() *OCRConfidence
- func (fv *FieldValue) SelectionElement() *SelectionElement
- func (fv *FieldValue) String() string
- func (fv *FieldValue) Text() string
- func (fv *FieldValue) Words() []*Word
- type Form
- type Geometry
- type IdentityDocument
- type Line
- type OCRConfidence
- type Orientation
- type Page
- func (p *Page) Blocks() []types.Block
- func (p *Page) Form() *Form
- func (p *Page) Geometry() *Geometry
- func (p *Page) ID() string
- func (p *Page) LineAtIndex(i int) *Line
- func (p *Page) LineCount() int
- func (p *Page) Lines() []*Line
- func (p *Page) Queries() Queries
- func (p *Page) Signatures() []*Signature
- func (p *Page) TableAtIndex(i int) *Table
- func (p *Page) TableCount() int
- func (p *Page) Tables() []*Table
- func (p *Page) Text() string
- type Point
- type Queries
- type Query
- type QueryResult
- type Row
- type SelectionElement
- type Signature
- type Table
- type Word
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type AnalyzeDocumentPage ¶ added in v0.0.2
func NewAnalyzeDocumentPageFromJSON ¶ added in v0.0.2
func NewAnalyzeDocumentPageFromJSON(data []byte) (*AnalyzeDocumentPage, error)
type AnalyzeDocumentSchema ¶ added in v0.0.2
type AnalyzeDocumentSchema struct { DocumentMetadata struct { Pages *int32 `json:"Pages"` } `json:"DocumentMetadata"` Blocks []struct { BlockType string `json:"BlockType"` ColumnIndex *int32 `json:"ColumnIndex"` ColumnSpan *int32 `json:"ColumnSpan"` ID *string `json:"Id"` Confidence *float32 `json:"Confidence"` Text *string `json:"Text"` EntityTypes []string `json:"EntityTypes"` Geometry struct { BoundingBox struct { Width float32 `json:"Width"` Height float32 `json:"Height"` Left float32 `json:"Left"` Top float32 `json:"Top"` } `json:"BoundingBox"` Polygon []struct { X float32 `json:"X"` Y float32 `json:"Y"` } `json:"Polygon"` } `json:"Geometry"` Relationships []struct { Type string `json:"Type"` IDs []string `json:"Ids"` } `json:"Relationships"` } `json:"Blocks"` }
type AnalyzeExpenseJSONResponse ¶ added in v0.0.2
type AnalyzeExpenseJSONResponse struct { DocumentMetadata struct { Pages *int32 `json:"Pages"` } `json:"DocumentMetadata"` ExpenseDocuments []struct { LineItemGroups []struct { LineItemGroupIndex *int32 `json:"LineItemGroupIndex"` LineItems []struct{} `json:"LineItems"` } `json:"LineItemGroups"` SummaryFields []struct{} `json:"SummaryFields"` } `json:"ExpenseDocuments"` }
type AnalyzeExpensePage ¶ added in v0.0.2
type AnalyzeExpensePage struct {
ExpenseDocuments []types.ExpenseDocument
}
type BoundingBox ¶
type BoundingBox struct {
// contains filtered or unexported fields
}
BoundingBox represents the bounding box of a geometry.
func NewBoundingBox ¶
func NewBoundingBox(boundingBox *types.BoundingBox) *BoundingBox
NewBoundingBox creates a new BoundingBox instance.
func (*BoundingBox) Bottom ¶
func (bb *BoundingBox) Bottom() float32
Bottom returns the bottom coordinate of the bounding box.
func (*BoundingBox) Height ¶
func (bb *BoundingBox) Height() float32
Height returns the height of the bounding box.
func (*BoundingBox) HorizontalCenter ¶
func (bb *BoundingBox) HorizontalCenter() float32
HorizontalCenter returns the horizontal center coordinate of the bounding box.
func (*BoundingBox) Intersection ¶
func (bb *BoundingBox) Intersection(other *BoundingBox) *BoundingBox
Intersection returns a new bounding box that represents the intersection of two bounding boxes.
func (*BoundingBox) Left ¶
func (bb *BoundingBox) Left() float32
Left returns the left coordinate of the bounding box.
func (*BoundingBox) Right ¶
func (bb *BoundingBox) Right() float32
Right returns the right coordinate of the bounding box.
func (*BoundingBox) String ¶ added in v0.0.2
func (bb *BoundingBox) String() string
String returns a string representation of the bounding box.
func (*BoundingBox) Top ¶
func (bb *BoundingBox) Top() float32
Top returns the top coordinate of the bounding box.
func (*BoundingBox) Union ¶
func (bb *BoundingBox) Union(other *BoundingBox) *BoundingBox
Union returns a new bounding box that represents the union of two bounding boxes.
func (*BoundingBox) VerticalCenter ¶
func (bb *BoundingBox) VerticalCenter() float32
VerticalCenter returns the vertical center coordinate of the bounding box.
func (*BoundingBox) Width ¶
func (bb *BoundingBox) Width() float32
Width returns the width of the bounding box.
type Cell ¶
type Cell struct {
// contains filtered or unexported fields
}
func (*Cell) ColumnIndex ¶
func (*Cell) ColumnSpan ¶
func (*Cell) Confidence ¶
func (c *Cell) Confidence() float32
Confidence returns the confidence level of the content.
type Content ¶
type Content interface { ID() string Confidence() float32 Geometry() *Geometry Block() types.Block }
Content is an interface for document content elements.
type Document ¶
type Document struct {
// contains filtered or unexported fields
}
Document represents a Textract document containing pages.
func NewDocument ¶
func NewDocument(responsePages ...*AnalyzeDocumentPage) *Document
NewDocument creates a new Document instance using response pages from Textract.
func (*Document) PageNumber ¶
PageNumber retrieves a page by its page number.
type Field ¶
type Field struct {
// contains filtered or unexported fields
}
Field represents a form field, consisting of a key and a value.
func (*Field) Confidence ¶
Confidence calculates the confidence score for the form field.
func (*Field) OCRConfidence ¶ added in v0.0.2
func (f *Field) OCRConfidence() *OCRConfidence
OCRConfidence calculates the OCR confidence for the form field.
func (*Field) Value ¶
func (f *Field) Value() *FieldValue
Value returns the value part of the form field.
type FieldKey ¶
type FieldKey struct {
// contains filtered or unexported fields
}
FieldKey represents the key part of a form field.
func NewFieldKey ¶
NewFieldKey creates a new FieldKey instance.
func (*FieldKey) Confidence ¶
func (c *FieldKey) Confidence() float32
Confidence returns the confidence level of the content.
func (*FieldKey) Geometry ¶
func (c *FieldKey) Geometry() *Geometry
Geometry returns the geometry information of the content.
func (*FieldKey) OCRConfidence ¶ added in v0.0.2
func (fk *FieldKey) OCRConfidence() *OCRConfidence
OCRConfidence calculates the OCR confidence for the field key.
type FieldValue ¶
type FieldValue struct {
// contains filtered or unexported fields
}
FieldValue represents the value part of a form field.
func NewFieldValue ¶
NewFieldValue creates a new FieldValue instance.
func (*FieldValue) Confidence ¶
func (c *FieldValue) Confidence() float32
Confidence returns the confidence level of the content.
func (*FieldValue) Geometry ¶
func (c *FieldValue) Geometry() *Geometry
Geometry returns the geometry information of the content.
func (*FieldValue) OCRConfidence ¶ added in v0.0.2
func (fv *FieldValue) OCRConfidence() *OCRConfidence
OCRConfidence calculates the OCR confidence for the field value.
func (*FieldValue) SelectionElement ¶ added in v0.0.2
func (fv *FieldValue) SelectionElement() *SelectionElement
SelectionElement returns the selection element associated with the field value.
func (*FieldValue) String ¶
func (fv *FieldValue) String() string
String returns the string representation of the field value.
func (*FieldValue) Text ¶
func (fv *FieldValue) Text() string
Text returns the text representation of the field value.
func (*FieldValue) Words ¶ added in v0.0.2
func (fv *FieldValue) Words() []*Word
Words returns the words constituting the field value.
type Form ¶
type Form struct {
// contains filtered or unexported fields
}
Form represents a form extracted from a document.
func (*Form) AddField ¶
AddField adds a field to the form, replacing it if a field with the same key already and lower confidence exists.
func (*Form) FieldByKey ¶
FieldByKey retrieves a field from the form by its key.
func (*Form) SearchFieldByKey ¶ added in v0.0.2
SearchFieldByKey searches for fields in the form with a key containing the specified string. It performs a case-insensitive search on the key text.
type Geometry ¶
type Geometry struct {
// contains filtered or unexported fields
}
Geometry represents the geometric properties of an element.
func NewGeometry ¶
NewGeometry creates a new Geometry instance.
func (*Geometry) BoundingBox ¶
func (g *Geometry) BoundingBox() *BoundingBox
BoundingBox returns the bounding box of the geometry.
func (*Geometry) Orientation ¶
func (g *Geometry) Orientation() *Orientation
Orientation returns the orientation of the geometry.
type IdentityDocument ¶ added in v0.0.2
type IdentityDocument struct{}
func NewIndentiyDocument ¶ added in v0.0.2
func NewIndentiyDocument() *IdentityDocument
type Line ¶
type Line struct {
// contains filtered or unexported fields
}
Line represents a line of text in the document.
func (*Line) Confidence ¶
func (c *Line) Confidence() float32
Confidence returns the confidence level of the content.
type OCRConfidence ¶ added in v0.0.2
type OCRConfidence struct {
// contains filtered or unexported fields
}
OCRConfidence represents the OCR confidence scores, including mean, max, and min values.
func NewOCRConfidenceFromScores ¶ added in v0.0.2
func NewOCRConfidenceFromScores(scores []float32) *OCRConfidence
NewOCRConfidenceFromScores creates a new OCRConfidence instance from a slice of confidence scores. If the scores slice is empty, it returns nil.
func (*OCRConfidence) Max ¶ added in v0.0.2
func (c *OCRConfidence) Max() float32
Max returns the maximum OCR confidence score.
func (*OCRConfidence) Mean ¶ added in v0.0.2
func (c *OCRConfidence) Mean() float32
Mean returns the mean (average) OCR confidence score.
func (*OCRConfidence) Min ¶ added in v0.0.2
func (c *OCRConfidence) Min() float32
Min returns the minimum OCR confidence score.
type Orientation ¶
type Orientation struct {
// contains filtered or unexported fields
}
Orientation represents the orientation of a geometric element.
func NewOrientation ¶
func NewOrientation(point0, point1 *Point) *Orientation
NewOrientation creates a new Orientation instance.
func (*Orientation) Degrees ¶
func (o *Orientation) Degrees() float32
Degrees returns the orientation in degrees.
func (*Orientation) Radians ¶
func (o *Orientation) Radians() float32
Radians returns the orientation in radians.
type Page ¶
type Page struct {
// contains filtered or unexported fields
}
Page represents a page in the document.
func (*Page) LineAtIndex ¶
LineAtIndex returns the line at the specified index.
func (*Page) Signatures ¶ added in v0.0.2
Signatures returns the signatures on the page.
func (*Page) TableAtIndex ¶
TableAtIndex returns the table at the specified index.
func (*Page) TableCount ¶
TableCount returns the total number of tables in the page.
type Point ¶
type Point struct {
// contains filtered or unexported fields
}
Point represents a 2D point.
type Query ¶ added in v0.0.2
type Query struct {
// contains filtered or unexported fields
}
Query represents a Textract query.
func (*Query) ResultsByConfidence ¶ added in v0.0.2
func (q *Query) ResultsByConfidence() []*QueryResult
ResultsByConfidence lists this query instance's results, sorted from most to least confident.
func (*Query) TopResult ¶ added in v0.0.2
func (q *Query) TopResult() *QueryResult
TopResult retrieves the top result by confidence score, if any are available.
type QueryResult ¶ added in v0.0.2
type QueryResult struct {
// contains filtered or unexported fields
}
QueryResult represents the result of a Textract query.
func NewQueryResult ¶ added in v0.0.2
func NewQueryResult(block types.Block) *QueryResult
NewQueryResult creates a new QueryResult instance.
func (*QueryResult) Block ¶ added in v0.0.2
Block returns the underlying types.Block of the content.
func (*QueryResult) Confidence ¶ added in v0.0.2
func (c *QueryResult) Confidence() float32
Confidence returns the confidence level of the content.
func (*QueryResult) Geometry ¶ added in v0.0.2
func (c *QueryResult) Geometry() *Geometry
Geometry returns the geometry information of the content.
func (*QueryResult) ID ¶ added in v0.0.2
func (c *QueryResult) ID() string
ID returns the ID of the content.
func (*QueryResult) Text ¶ added in v0.0.2
func (qr *QueryResult) Text() string
Text returns the text content of the query result.
type Row ¶
type Row struct {
// contains filtered or unexported fields
}
func (Row) Confidence ¶
func (c Row) Confidence() float32
Confidence returns the confidence level of the content.
type SelectionElement ¶
type SelectionElement struct {
// contains filtered or unexported fields
}
SelectionElement represents a selectable element in the document.
func NewSelectionElement ¶
func NewSelectionElement(block types.Block) *SelectionElement
NewSelectionElement creates a new SelectionElement instance.
func (*SelectionElement) Confidence ¶
func (c *SelectionElement) Confidence() float32
Confidence returns the confidence level of the content.
func (*SelectionElement) Geometry ¶
func (c *SelectionElement) Geometry() *Geometry
Geometry returns the geometry information of the content.
func (*SelectionElement) ID ¶
func (c *SelectionElement) ID() string
ID returns the ID of the content.
func (*SelectionElement) IsSelected ¶
func (se *SelectionElement) IsSelected() bool
IsSelected checks if the element is selected.
func (*SelectionElement) Status ¶
func (se *SelectionElement) Status() types.SelectionStatus
Status returns the selection status of the element.
type Signature ¶ added in v0.0.2
type Signature struct {
// contains filtered or unexported fields
}
Signature represents a signature in a document.
func NewSignature ¶ added in v0.0.2
NewSignature creates a new Signature instance.
func (*Signature) Confidence ¶ added in v0.0.2
func (c *Signature) Confidence() float32
Confidence returns the confidence level of the content.
type Table ¶
type Table struct {
// contains filtered or unexported fields
}
func (Table) Confidence ¶
func (c Table) Confidence() float32
Confidence returns the confidence level of the content.
type Word ¶
type Word struct {
// contains filtered or unexported fields
}
Word represents a word in the document.
func (*Word) Confidence ¶
func (c *Word) Confidence() float32
Confidence returns the confidence level of the content.
func (*Word) Geometry ¶
func (c *Word) Geometry() *Geometry
Geometry returns the geometry information of the content.
func (*Word) IsHandwriting ¶
IsHandwriting checks if the word is handwriting.