Versions in this module Expand all Collapse all v1 v1.2.0 Dec 27, 2020 v1.1.0 Jul 14, 2020 Changes in this version + const MaxLineProcessed + func SavePlan(confHansardType HansardType, workingDir string, sourcePDFPath string, ...) + func SetupSplitPlanFixture(testDir string, fixtureDir string, scenarioDir string, sourcePDFPath string, ...) error + func Split(t string, c string) []string + type HansardDocument struct + HansardQuestions []HansardQuestion + HansardType HansardType + ParliamentSession string + func LoadSplitHansardDocPlanFromFile(confHansardType HansardType, workingDir string, sourcePDFPath string) *HansardDocument + func NewHansardDocument(pdfPath string) (*HansardDocument, error) + func (hd *HansardDocument) Finalize() + func (hd *HansardDocument) PersistForSplit(absoluteRawDataPath string) error + func (hd *HansardDocument) ProcessLinesExcerpt(pageNum int, linesExcerpt []string) error + func (hd *HansardDocument) ShowQuestions() + func (hd *HansardDocument) ShowState() + func (hd *HansardDocument) SplitPDFByQuestions() error + func (hd *HansardDocument) String() + type HansardPage struct + type HansardQuestion struct + PageNumEnd int + PageNumStart int + QuestionNum string + func NewHansardQuestion(pageNumStart int, possibleQuestionNum string) (*HansardQuestion, error) + type HansardType int + const HANSARD_SPOKEN + const HANSARD_WRITTEN + type PDFDocument struct + NumPages int + Pages []PDFPage + func NewPDFDoc(sourcePath string) (*PDFDocument, error) + type PDFPage struct + PDFPlainText string + PDFTxtSameLines []string + PDFTxtSameStyles []string + PageNo int + type QuestionStatus int + const QUESTION_EXTRACTED + const QUESTION_NOT_SEEN + const QUESTION_SEEN + type SplitHansardDocumentPlan struct + func NewSplitHansardDocumentPlan(confHansardType HansardType, workingDir string, sourcePDFPath string) *SplitHansardDocumentPlan + func (shdp *SplitHansardDocumentPlan) ExecuteSplit(label string, hq HansardQuestion)