Documentation ¶
Index ¶
- Variables
- func BillIdToBillNumber(billId string) string
- func BillNumberFromPath(billPath string) string
- func BillNumberToBillId(billNumber string) string
- func CollectWordSample(fpath string, wordSampleStorageChannel chan WordSample, wg *sync.WaitGroup) error
- func CollectWordSamplesFromBills(pathToCongressDataDir string) (allWords []string)
- func CompareBills(parentPath string, billList []string, print bool) ([][]CompareItem, error)
- func CompareBillsfromPaths(docPaths []string, print bool) ([][]CompareItem, error)
- func CompareSamples()
- func CopyFile(src, dst string) error
- func CustomTokenize(text string) (wordList []string)
- func DownloadCommitteesYaml() (downloadpath string, err error)
- func DownloadFile(filepath string, url string) error
- func DownloadLegislatorsYaml() (downloadpath string, err error)
- func ExtractBillMeta(billPath string, billMetaStorageChannel chan BillMeta, sem chan bool, ...) error
- func Find(slice []string, val string) (int, bool)
- func FindNamedMatches(regex *regexp.Regexp, str string) map[string]string
- func GetAllBillNumbers() []string
- func GetBillNumbersByCongress(congress string) []string
- func GetBill_ES(billnumber string) map[string]interface{}
- func GetCompareMap(compareRow []CompareItem) (compareMap map[string]CompareItem)
- func GetCongressIdQuery(congress string) map[string]interface{}
- func GetKeysFromMap(m map[string]interface{}) (keys []string)
- func GetMatchingBillNumberVersions(results SearchResult_ES) (billnumberversions []string)
- func GetMatchingBills(results SearchResult_ES) (billnumbers []string)
- func GetMoreLikeThis_ES(size, minscore int, searchtext string) map[string]interface{}
- func GetSampleBillNumbers() []string
- func GetSimilarBillsDict(similarSectionsItems SimilarSectionsItems, maxBills int) (similarBillsDict map[string]SimilarSections)
- func GetSyncMapKeys(m *sync.Map) (s string)
- func ListDataJsonFiles(pathToCongressDataDir string) (dataJsonFiles []string, err error)
- func ListDocumentXMLFiles(pathToCongressDataDir string) (documentXMLFiles []string, err error)
- func LoadEnv() (err error)
- func LoadMainTitles(mainTitleSyncMap *sync.Map, billMetaSyncMap *sync.Map)
- func LoadTitles(titleSyncMap *sync.Map, billMetaSyncMap *sync.Map)
- func MakeBillQuery(billnumber string) (billquery map[string]interface{})
- func MakeBillsMeta(parentPath string)
- func MakeMLTQuery(size, minscore int, searchtext string) (mltquery map[string]interface{})
- func MakeNgramMap(text string, n int) (wordMap map[string]int)
- func MakeNgrams(text string, n int) (wordList []string)
- func MakeTempDir()
- func MapNgramKeys(nGramMap map[string]int) (keys []string)
- func MarshalJSONBillMeta(m *sync.Map) ([]byte, error)
- func MarshalJSONBillSimilarity(m *sync.Map) ([]byte, error)
- func MarshalJSONStringArray(m *sync.Map) ([]byte, error)
- func PathFromBillNumber(billNumber string) (string, error)
- func Prepend(filepath string, text string) error
- func PrependSlice(slice []string, val string) []string
- func PrintESInfo()
- func ReadLegislatorsYaml()
- func ReadToString(r io.Reader) string
- func RemoveDuplicates(elements []string) []string
- func RemoveIndex(slice []string, index int) []string
- func RemoveVal(slice []string, val string) []string
- func ReverseSlice(slice []string) []string
- func ReverseStrings(ss []string)
- func RunQuery(query map[string]interface{}) (r map[string]interface{})
- func SaveBillDataJson(billCongressTypeNumber string, dataJson []byte, parentPath string, ...) (savePath string, err error)
- func SaveBillJson(billCongressTypeNumber string, billMetaItem BillMeta, parentPath string) error
- func SaveBillJsonToDB(billCongressTypeNumber string, billMetaItem BillMeta) error
- func ScrollQueryBillNumbers(buf bytes.Buffer, resultChan chan []gjson.Result)
- func SortReasons(reasons []string) []string
- func UnmarshalJson(data []byte) (*sync.Map, error)
- func UnmarshalJsonFile(jpath string) (*sync.Map, error)
- func WalkDirFilter(root string, testPath FilterFunc) (filePaths []string, err error)
- func WriteBillMetaFile(billMeta interface{}, parentPath string) (saved bool)
- func WriteBillMetaFiles(billMetaSyncMap *sync.Map, parentPath string)
- func WriteRelatedDictFiles(billMetaSyncMap *sync.Map, parentPath string)
- type ActionItem
- type BillItemES
- type BillLevels
- type BillMeta
- type BillMetaDoc
- type BillScore
- type Committee
- type CommitteeItem
- type Committees
- type CompareItem
- type CosponsorItem
- type DataJson
- type FilterFunc
- type Hit_ES
- type Hits_ES
- type Id
- type InnerHit
- type InnerHitSections
- type InnerHits
- type Legislator
- type Legislators
- type LogLevel
- type LogLevels
- type RelatedBillItem
- type RelatedBillMap
- type ResultHits
- type ResultInnerHits
- type SearchResult_ES
- type SectionItem
- type SectionItemMeta
- type SimilarBillData
- type SimilarBillMap
- type SimilarBillMapBySection
- type SimilarSection
- type SimilarSections
- type SimilarSectionsItem
- type SimilarSectionsItems
- type Subcommittee
- type SummaryItem
- type TitlesJson
- type WordSample
Constants ¶
This section is empty.
Variables ¶
var ( BillnumberRegexCompiled = regexp.MustCompile(`(?P<congress>[1-9][0-9]*)(?P<stage>[a-z]{1,8})(?P<billnumber>[1-9][0-9]*)(?P<version>[a-z]+)?`) BillFileRegexCompiled = regexp.MustCompile(`BILLS-(?P<congress>[1-9][0-9]*)(?P<stage>[a-z]{1,8})(?P<billnumber>[1-9][0-9]*)(?P<version>[a-z]+)?(?:-uslm)?.xml`) // e.g. congress/data/117/bills/sconres/sconres2 DcTitle_Regexp = regexp.MustCompile(`<dc:title>(.*)?<`) // matches the title in the <dc:title> element UsCongressPathRegexCompiled = regexp.MustCompile(`data\/(?P<congress>[1-9][0-9]*)\/(?P<doctype>[a-z]+)\/(?P<stage>[a-z]{1,8})\/(?P<billnumber>[a-z]{1,8}[1-9][0-9]*)\/?(text-versions\/(?P<version>[a-z]+))?`) // matches strings of the form '...of 1979', where the year is a 4-digit number TitleNoYearRegexCompiled = regexp.MustCompile(`of\s[0-9]{4}$`) // Set to ../../congress PathToDataDir = path.Join("/", "data") ParentPathDefault = path.Join("..", "..", "..") CongressDir = "congress" BillMetaFile = "billMetaGo.json" BillSimilarityFile = "billSimilarityGo.json" TitleNoYearIndex = "titleNoYearIndexGo.json" MainTitleNoYearIndex = "mainTitleNoYearIndexGo.json" BillsFile = "billsGo.json" PathToCongressDataDir = path.Join(ParentPathDefault, CongressDir) BillMetaPath = path.Join(ParentPathDefault, BillMetaFile) BillSimilarityPath = path.Join(ParentPathDefault, BillSimilarityFile) TitleNoYearIndexPath = path.Join(ParentPathDefault, TitleNoYearIndex) MainTitleNoYearIndexPath = path.Join(ParentPathDefault, MainTitleNoYearIndex) BillsPath = path.Join(ParentPathDefault, BillsFile) BillMetaSyncMap = new(sync.Map) // titleSyncMap = new(sync.Map) MainTitleNoYearSyncMap = new(sync.Map) TitleNoYearSyncMap = new(sync.Map) MainTitleMatchReason = "bills-title_match_main" TitleMatchReason = "bills-title_match" IdentifiedByBillMap = "BillMap" BillVersionsOrdered = billVersions{"ih": 0, "rh": 1, "rfs": 2, "eh": 3, "es": 4, "enr": 5} ZLogLevels = LogLevels{"Debug": zerolog.DebugLevel, "Info": zerolog.InfoLevel, "Error": zerolog.ErrorLevel} )
Constants for this package
var ( GOVINFO_BASE_URL = "https://www.govinfo.gov/" COLLECTION_BASE_URL = GOVINFO_BASE_URL + "app/details/" BULKDATA_BASE_URL = GOVINFO_BASE_URL + "bulkdata/" COLLECTION_SITEMAPINDEX_PATTERN = GOVINFO_BASE_URL + "sitemap/{collection}_sitemap_index.xml" BULKDATA_SITEMAPINDEX_PATTERN = GOVINFO_BASE_URL + "sitemap/bulkdata/{collection}/sitemapindex.xml" FDSYS_BILLSTATUS_FILENAME = "fdsys_billstatus.xml" // for xpath NS = map[string]string{"x": "http://www.sitemaps.org/schemas/sitemap/0.9"} )
globals
var REASON_ORDER = map[string]int{"bills-identical": 1, "bills-nearly_identical": 2, "bills-title_match": 3, "bills-includes": 4, "bills-included_by": 5, "related": 6, "bills-some_similarity": 7, "bills-unrelated": 8}
Functions ¶
func BillIdToBillNumber ¶
Converts a bill_id of the form `hr299-116` into `116hr299`
func BillNumberFromPath ¶
Gets billnumber + version from the bill path E.g. bill_path of the form e.g. path/data/116/bills/hr/hr1500/text-versions/rh
returns 116hr1500rh
func BillNumberToBillId ¶
Converts a bill number of the form `116hr299` into `hr299-116`
func CollectWordSample ¶
func CollectWordSample(fpath string, wordSampleStorageChannel chan WordSample, wg *sync.WaitGroup) error
For each path to a data file, creates a random sample of tokenized words of length sampleFraction * number of tokenized words Sends the result to a channel
func CollectWordSamplesFromBills ¶
Collects a random sample of tokenized words for each bill in 'document.xml' files in the 'congress' directory Writes the results to the wordSamplePath
func CompareBills ¶
func CompareBills(parentPath string, billList []string, print bool) ([][]CompareItem, error)
func CompareBillsfromPaths ¶
func CompareBillsfromPaths(docPaths []string, print bool) ([][]CompareItem, error)
func CompareSamples ¶
func CompareSamples()
Compares a sample list of documents, defined in dOC_PATHS
func CopyFile ¶
Copy the src file to dst. Any existing file will be overwritten and will not copy file attributes.
func CustomTokenize ¶
Tokenizer function that returns words longer than 3 characters which do not have certain punctuation. Currently: "-./(),!@#$%^&*:\\;"
func DownloadCommitteesYaml ¶
func DownloadFile ¶
DownloadFile will download a url to a local file. It's efficient because it will write as it downloads and not load the whole file into memory. See https://golangcode.com/download-a-file-from-a-url/
func DownloadLegislatorsYaml ¶
func ExtractBillMeta ¶
func ExtractBillMeta(billPath string, billMetaStorageChannel chan BillMeta, sem chan bool, wg *sync.WaitGroup) error
Extracts bill metadata from a path to a data.json file; sends it to the billMetaStorageChannel as part of a WaitGroup passed as wg
func Find ¶
Find takes a slice and looks for an element in it. If found it will return its index and a bool of true; otherwise it will return -1 and a bool of false.
func FindNamedMatches ¶
Returns a map of regex capture groups to the items that are matched
func GetAllBillNumbers ¶
func GetAllBillNumbers() []string
Gets all ids, which includes bill and version
func GetBill_ES ¶
func GetCompareMap ¶
func GetCompareMap(compareRow []CompareItem) (compareMap map[string]CompareItem)
func GetCongressIdQuery ¶
func GetKeysFromMap ¶
func GetMatchingBillNumberVersions ¶
func GetMatchingBillNumberVersions(results SearchResult_ES) (billnumberversions []string)
func GetMatchingBills ¶
func GetMatchingBills(results SearchResult_ES) (billnumbers []string)
func GetMoreLikeThis_ES ¶
func GetSampleBillNumbers ¶
func GetSampleBillNumbers() []string
func GetSimilarBillsDict ¶
func GetSimilarBillsDict(similarSectionsItems SimilarSectionsItems, maxBills int) (similarBillsDict map[string]SimilarSections)
func ListDataJsonFiles ¶
Walk 'congress' directory and get filepaths to 'data.json' which contains metadata for the bill
func ListDocumentXMLFiles ¶
Walk 'congress' directory and get filepaths to 'document.xml' which contains the bill xml
func MakeBillQuery ¶
func MakeBillsMeta ¶
func MakeBillsMeta(parentPath string)
Walks the 'congress' directory Creates three metadata files: bills, titlesJson and billMeta bills is the list of bill numbers (billCongressTypeNumber) titles is a list of titles (no year) billMeta collects metadata from data.json files
func MakeMLTQuery ¶
func MakeNgramMap ¶
Creates a map with ngrams as keys and number of occurences as values n is the number of words in each n-gram
func MakeNgrams ¶
Creates a list of ngrams. First makes a map with 'MakeNgramMap' Then returns a list of the keys of the map
func MapNgramKeys ¶
Returns the keys of a map of type map[string]int
func MarshalJSONBillMeta ¶
Marshals a sync.Map object of the type map[string]BillMeta see https://stackoverflow.com/a/46390611/628748 and https://stackoverflow.com/a/65442862/628748
func MarshalJSONStringArray ¶
Marshals a sync.Map object of the type map[string][]string see https://stackoverflow.com/a/46390611/628748 and https://stackoverflow.com/a/65442862/628748
func PathFromBillNumber ¶
Gets bill path from the billnumber + version E.g. billnumber of the form 116hr1500rh returns path/116/bills/hr/hr1/text-versions/rh
func PrependSlice ¶
func PrintESInfo ¶
func PrintESInfo()
func ReadLegislatorsYaml ¶
func ReadLegislatorsYaml()
func ReadToString ¶
func RemoveDuplicates ¶
Removes duplicates in a list of strings Returns the deduplicated list Trims leading and trailing space for each element
func RemoveIndex ¶
func ReverseStrings ¶
func ReverseStrings(ss []string)
func SaveBillDataJson ¶
func SaveBillDataJson(billCongressTypeNumber string, dataJson []byte, parentPath string, fileName string) (savePath string, err error)
Saves Data in JSON to bill directory
func SaveBillJson ¶
Saves bill metadata to billMeta.json
func SaveBillJsonToDB ¶
Saves bill metadata to db (badger or bolt) via bh
func ScrollQueryBillNumbers ¶
Performs scroll query over indices in `searchIndices`; sends result to the resultChan for processing to extract billnumbers See https://github.com/elastic/go-elasticsearch/issues/44#issuecomment-483974031
func SortReasons ¶
func UnmarshalJson ¶
Unmarshals from JSON to a syncMap See https://stackoverflow.com/a/65442862/628748
func WalkDirFilter ¶
func WalkDirFilter(root string, testPath FilterFunc) (filePaths []string, err error)
Walk directory with a filter. Returns the filepaths that pass the 'testPath' function
func WriteBillMetaFile ¶
TODO: return saved path
func WriteBillMetaFiles ¶
func WriteRelatedDictFiles ¶
Types ¶
type ActionItem ¶
type BillItemES ¶
type BillItemES struct { ID string `json:"id"` BillNumber string `json:"billnumber"` BillVersion string `json:"billversion"` Congress string `json:"congress"` Session string `json:"session"` Date string `json:"date"` DC []string `json:"dc"` DCTitle string `json:"dctitle"` Headers []string `json:"headers"` Legisnum string `json:"legisnum"` Sections []SectionItem `json:"sections"` }
func BillResultToStruct ¶
func BillResultToStruct(billresult map[string]interface{}) (billItemResult BillItemES, err error)
func GetLatestBill ¶
func GetLatestBill(r map[string]interface{}) (latestbill BillItemES, err error)
Sort the eh, es, and enr as latest Then sort by date TODO: better method is to get the latest version in Fdsys_billstatus
type BillLevels ¶
func ParseBill ¶
func ParseBill(sampleFilePath string) (parsedBill BillLevels)
type BillMeta ¶
type BillMeta struct { Actions []ActionItem `json:"actions"` Congress string `json:"congress"` BillType string `json:"bill_type"` Number string `json:"number"` BillCongressTypeNumber string `json:"bill_congress_type_number"` History interface{} `json:"history"` OfficialTitle string `json:"official_title"` PopularTitle string `json:"popular_title"` ShortTitle string `json:"short_title"` Titles []string `json:"titles"` TitlesWholeBill []string `json:"titles_whole_bill"` Cosponsors []CosponsorItem `json:"cosponsors"` Committees []CommitteeItem `json:"committees"` RelatedBills []RelatedBillItem `json:"related_bills"` RelatedBillsByBillnumber RelatedBillMap `json:"related_dict"` }
func MakeBillMeta ¶
type BillMetaDoc ¶
type BillScore ¶
func GetSimilarBills ¶
func GetSimilarBills(similarBillMapBySection SimilarBillMapBySection) (billScores []BillScore)
type Committee ¶
type Committee struct { Type string `yaml:"type,omitempty,flow"` Name string `yaml:"name,omitempty,flow"` Url string `yaml:"url,omitempty,flow"` MinorityUrl string `yaml:"minority_url,omitempty,flow"` ThomasId string `yaml:"thomas_id,omitempty,flow"` HouseCommitteeId string `yaml:"house_committee_id,omitempty,flow"` Subcommittees []Subcommittee `yaml:"subcommittees,omitempty,flow"` Address string `yaml:"address,omitempty,flow"` Phone string `yaml:"phone,omitempty,flow"` RssUrl string `yaml:"rss_url,omitempty,flow"` Jurisdiction string `yaml:"jurisdiction,omitempty,flow"` }
type CommitteeItem ¶
type Committees ¶
type Committees struct {
Committees []Committee `yaml:"committees,omitempty,flow"`
}
func ReadCommitteesYaml ¶
func ReadCommitteesYaml() (committees Committees, err error)
func (*Committees) ParseCommitteeYaml ¶
func (c *Committees) ParseCommitteeYaml(data []byte) error
type CompareItem ¶
type CosponsorItem ¶
type CosponsorItem struct { BioguideId string `json:"bioguide_id"` ThomasId string `json:"thomas_id"` // Type string `json:"type"` District string `json:"district"` Name string `json:"name"` OriginalCosponsor bool `json:"original_cosponsor"` SponsoredAt string `json:"sponsored_at"` State string `json:"state"` Title string `json:"title"` }
type DataJson ¶
type DataJson struct { Actions []ActionItem `json:"actions"` Amendments []interface{} `json:"amendments"` BillId string `json:"bill_id"` BillType string `json:"bill_type"` ByRequest bool `json:"by_request"` CommitteeReports []interface{} `json:"committee_reports"` Committees []CommitteeItem `json:"committees"` Congress string `json:"congress"` Cosponsors []CosponsorItem `json:"cosponsors"` EnactedAs string `json:"enacted_as"` History interface{} `json:"history"` IntroducedAt string `json:"introduced_at"` Number string `json:"number"` OfficialTitle string `json:"official_title"` PopularTitle string `json:"popular_title"` RelatedBills []RelatedBillItem `json:"related_bills"` ShortTitle string `json:"short_title"` Sponsor string `json:"sponsor"` Status string `json:"status"` StatusAt string `json:"status_at"` Subjects []interface{} `json:"subjects"` SubjectsTopTerm string `json:"subjects_top_term"` Summary SummaryItem `json:"summary"` Titles []TitlesJson `json:"titles"` UpdatedAt string `json:"updated_at"` Url string `json:"url"` }
type FilterFunc ¶
type Hit_ES ¶
type Hits_ES ¶
type Hits_ES []Hit_ES
func GetHitsES ¶
func GetHitsES(results SearchResult_ES) (innerHits Hits_ES, err error)
type Id ¶
type Id struct {
Id []Legislator `yaml:"id,omitempty,flow"`
}
type InnerHitSections ¶
type InnerHitSections struct { Hits struct { Hits []struct { SectionHit struct { ID string `json:"_id"` Index string `json:"_index"` Nested struct { Field string `json:"field"` Offset int `json:"offset"` } `json:"_nested"` Score string `json:"_score"` Source struct { SectionNumber string `json:"section_number"` SectionHeader string `json:"section_header"` SectionText string `json:"section_text"` } `json:"_source"` } } `json:"hits"` } `json:"hits"` }
type InnerHits ¶
type InnerHits struct { Sections struct { Hits struct { Hits []InnerHit MaxScore float32 `json:"max_score"` Total struct { Relation string `json:"relation"` Value int `json:"value"` } `json:"total"` } `json:"hits"` } `json:"sections"` }
func GetInnerHits ¶
func GetInnerHits(results SearchResult_ES) (innerHits []InnerHits, err error)
type Legislator ¶
type Legislator struct { Bioguide string `yaml:"bioguide,omitempty,flow"` Thomas string `yaml:"thomas,omitempty,flow"` Lis string `yaml:"lis,omitempty,flow"` Govtrack string `yaml:"govtrack,omitempty,flow"` Opensecrets string `yaml:"opensecrets,omitempty,flow"` Votesmart string `yaml:"votesmart,omitempty,flow"` Fec []string `yaml:"fec,omitempty,flow"` Cspan string `yaml:"cspan,omitempty,flow"` Wikipedia string `yaml:"wikipedia,omitempty,flow"` HouseHistory string `yaml:"house_history,omitempty,flow"` Ballotpedia string `yaml:"ballotpedia,omitempty,flow"` Maplight string `yaml:"maplight,omitempty,flow"` Icpsr string `yaml:"icpsr,omitempty,flow"` Wikidata string `yaml:"wikidata,omitempty,flow"` GoogleEntityId string `yaml:"google_entity_id,omitempty,flow"` Name struct { First string `yaml:"first,omitempty,flow"` Last string `yaml:"last,omitempty,flow"` OfficialFull string `yaml:"official_full,omitempty,flow"` } `yaml:"name,omitempty,flow"` Bio struct { Birthday string `yaml:"birthday,omitempty,flow"` Gender string `yaml:"gender,omitempty,flow"` } Terms []struct { Type string `yaml:"type,omitempty,flow"` Start string `yaml:"Start,omitempty,flow"` End string `yaml:"End,omitempty,flow"` State string `yaml:"State,omitempty,flow"` District string `yaml:"District,omitempty,flow"` Party string `yaml:"Party,omitempty,flow"` StateRank string `yaml:"state_rank,omitempty,flow"` Url string `yaml:"url,omitempty,flow"` RssUrl string `yaml:"rss_url,omitempty,flow"` ContactForm string `yaml:"contact_form,omitempty,flow"` Address string `yaml:"address,omitempty,flow"` Office string `yaml:"office,omitempty,flow"` Phone string `yaml:"phone,omitempty,flow"` } }
type Legislators ¶
type Legislators struct {
Legislators []Legislator `yaml:"legislators,omitempty,flow"`
}
func (*Legislators) ParseLegislatorsYaml ¶
func (c *Legislators) ParseLegislatorsYaml(data []byte) error
type RelatedBillItem ¶
type RelatedBillItem struct { BillId string `json:"bill_id"` IdentifiedBy string `json:"identified_by"` Reason string `json:"reason"` Type string `json:"type"` BillCongressTypeNumber string `json:"bill_congress_type_number"` //Sponsor CosponsorItem `json:"sponsor"` //Cosponsors []CosponsorItem `json:"cosponsors"` Titles []string `json:"titles"` TitlesWholeBill []string `json:"titles_whole_bill"` }
type RelatedBillMap ¶
type RelatedBillMap map[string]RelatedBillItem
type ResultHits ¶
type ResultHits struct { MaxScore float32 `json:"max_score"` Total struct { Relation string `json:"relation"` Value int `json:"value"` } `json:"total"` Hits Hits_ES `json:"hits"` }
ResultHits represents the result of the search hits
type ResultInnerHits ¶
type ResultInnerHits []struct { Index string `json:"_index"` Type string `json:"_type"` ID string `json:"_id"` Score float32 `json:"_score"` Source json.RawMessage `json:"_source"` //Highlight map[string][]string `json:"highlight,omitempty"` Sections InnerHitSections `json:"sections"` }
type SearchResult_ES ¶
type SearchResult_ES struct { Took uint64 `json:"took"` TimedOut bool `json:"timed_out"` Shards struct { Total int `json:"total"` Successful int `json:"successful"` Failed int `json:"failed"` Skipped int `json:"skipped"` } `json:"_shards"` Hits ResultHits `json:"hits"` }
SearchResult represents the result of the search operation
func GetMLTResult ¶
func GetMLTResult(size, minscore int, searchtext string) (esResult SearchResult_ES, err error)
type SectionItem ¶
type SectionItem struct { BillNumber string `json:"bill_number"` BillNumberVersion string `json:"bill_number_version"` SectionIndex string `json:"sectionIndex"` SectionNumber string `json:"section_number"` SectionHeader string `json:"section_header"` SectionText string `json:"section_text"` SectionXML string `json:"section_xml"` }
type SectionItemMeta ¶
type SimilarBillData ¶
type SimilarBillData struct { TopSectionIndex string TopSectionHeader string TopSectionNum string TopSectionScore float32 TotalScore float32 TotalSimilarSections int SectionItemMetaMap map[SectionItemMeta]SimilarSection }
type SimilarBillMap ¶
type SimilarBillMap map[string]SimilarSections
type SimilarBillMapBySection ¶
type SimilarBillMapBySection map[string]SimilarBillData
func GetSimilarityBillMapBySection ¶
func GetSimilarityBillMapBySection(billItem BillItemES, sampleSize int) (similarBillMapBySection SimilarBillMapBySection)
func SimilarSectionsItemsToBillMap ¶
func SimilarSectionsItemsToBillMap(similarSectionsItems SimilarSectionsItems) (similarBillMapBySection SimilarBillMapBySection)
type SimilarSection ¶
type SimilarSection struct { Date string `json:"date"` Score float32 `json:"score"` Title string `json:"title"` Session string `json:"session"` Congress string `json:"congress"` Legisnum string `json:"legisnum"` Billnumber string `json:"billnumber"` SectionNum string `json:"section_num"` SectionIndex string `json:"sectionIndex"` SectionHeader string `json:"section_header"` BillCongressTypeNumberVersion string `json:"bill_number_version"` TargetSectionHeader string `json:"target_section_header"` // This is the section header of the original TargetSectionNumber string `json:"target_section_number"` // This is the section number of the original TargetSectionIndex string `json:"target_section_index"` // This is the section index of the original }
This is the form of item in `es_similar_bills_dict`; for each billnumber (e.g. '116hr238'), it collects the best scoring sections
type SimilarSections ¶
type SimilarSections []SimilarSection
func GetSimilarSections ¶
func GetSimilarSections(results SearchResult_ES, queryItem SectionItem) (similarSections SimilarSections, err error)
similars is the result of the MLT query
type SimilarSectionsItem ¶
type SimilarSectionsItem struct { BillNumber string `json:"bill_number"` // Original (target) bill BillNumberVersion string `json:"bill_number_version"` // Original (target) bill version SectionHeader string `json:"section_header"` // Original (target) section header SectionNum string `json:"section"` // Original (target) section number SectionIndex string `json:"sectionIndex"` // Original (target) section index SimilarSections SimilarSections `json:"similar_sections"` // list of similar sections SimilarBills []string `json:"similar_bills"` // deduplicated list of billnumbers from highest to lowest score SimilarBillNumberVersions []string `json:"similar_bill_number_versions"` // deduplicated list of billnumberversions from highest to lowest score }
func SectionItemQuery ¶
func SectionItemQuery(sectionItem SectionItem) (similarSectionsItem SimilarSectionsItem)
type SimilarSectionsItems ¶
type SimilarSectionsItems []SimilarSectionsItem
func GetSimilaritySectionsByBillNumber ¶
func GetSimilaritySectionsByBillNumber(billItem BillItemES, samplesize int) (similarSectionsItems SimilarSectionsItems)
Set sample size to <= 0 to use all sections