matcher

package
v0.0.0-...-c9cb955 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jun 21, 2024 License: MIT Imports: 15 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CalculateBinaryKey

func CalculateBinaryKey(referenceEntities []string, street string) string

Calculate the binary key for a given street address

func ClearOldCandidates

func ClearOldCandidates(pool *pgxpool.Pool, runID int)

func CreateNewRun

func CreateNewRun(pool *pgxpool.Pool, description string) int

func GenerateEmbeddingsPythonScript

func GenerateEmbeddingsPythonScript(scriptPath string, runID int) error

GenerateEmbeddingsPythonScript runs the Python script to generate embeddings.

func GenerateTFIDF

func GenerateTFIDF(pool *pgxpool.Pool, runID int)

Generate TF/IDF vectors and insert them into the database

func InsertBatch

func InsertBatch(pool *pgxpool.Pool, batch [][2]interface{}, runID int)

InsertBatch inserts a batch of results into the database

func InsertFromLoadTable

func InsertFromLoadTable(pool *pgxpool.Pool, runID int) error

InsertFromLoadTable inserts records from the load_table into customer_matching

func IsNumeric

func IsNumeric(s string) bool

IsNumeric checks if a string contains only numeric characters

func LoadReferenceEntities

func LoadReferenceEntities(pool *pgxpool.Pool) []string

Load reference entities into memory

func LoadSQLQuery

func LoadSQLQuery(filepath string) (string, error)

LoadSQLQuery loads an SQL query from a file

func ProcessCustomerAddresses

func ProcessCustomerAddresses(pool *pgxpool.Pool, referenceEntities []string, numWorkers int, runID int)

ProcessCustomerAddresses processes customer addresses and generates binary keys

func ProcessSingleRecord

func ProcessSingleRecord(pool *pgxpool.Pool, req MatchRequest) error

ProcessSingleRecord processes a single record and inserts it into the database

func StandardizeAddress

func StandardizeAddress(street string) (string, error)

StandardizeAddress takes a raw address string and returns a standardized address string.

func TruncateBatchMatchTable

func TruncateBatchMatchTable(pool *pgxpool.Pool) error

TruncateBatchMatchTable truncates the batch_match table

Types

type Candidate

type Candidate struct {
	InputCustomerID          int     `json:"input_customer_id"`
	InputRunID               int     `json:"input_run_id"`
	InputFirstName           string  `json:"input_first_name"`
	InputLastName            string  `json:"input_last_name"`
	InputStreet              string  `json:"input_street"`
	InputCity                string  `json:"input_city"`
	InputState               string  `json:"input_state"`
	InputZipCode             string  `json:"input_zip_code"`
	InputPhoneNumber         string  `json:"input_phone_number"`
	CandidateCustomerID      int     `json:"candidate_customer_id"`
	CandidateRunID           int     `json:"candidate_run_id"`
	CandidateFirstName       string  `json:"candidate_first_name"`
	CandidateLastName        string  `json:"candidate_last_name"`
	CandidateStreet          string  `json:"candidate_street"`
	CandidateCity            string  `json:"candidate_city"`
	CandidateState           string  `json:"candidate_state"`
	CandidateZipCode         string  `json:"candidate_zip_code"`
	CandidatePhoneNumber     string  `json:"candidate_phone_number"`
	Similarity               float64 `json:"similarity"`
	BinKeyMatch              bool    `json:"bin_key_match"`
	TfidfScore               float64 `json:"tfidf_score"`
	Rank                     int     `json:"rank"`
	Score                    float64 `json:"score"`
	TrigramCosineFirstName   float64 `json:"trigram_cosine_first_name"`
	TrigramCosineLastName    float64 `json:"trigram_cosine_last_name"`
	TrigramCosineStreet      float64 `json:"trigram_cosine_street"`
	TrigramCosineCity        float64 `json:"trigram_cosine_city"`
	TrigramCosinePhoneNumber float64 `json:"trigram_cosine_phone_number"`
	TrigramCosineZipCode     float64 `json:"trigram_cosine_zip_code"`
}

Candidate represents a potential match

func FindPotentialMatches

func FindPotentialMatches(pool *pgxpool.Pool, runID int, topN int) ([]Candidate, error)

FindPotentialMatches finds potential matches and scores them based on composite score

type Config

type Config struct {
	DBCreds struct {
		Host      string `yaml:"host"`
		Port      string `yaml:"port"`
		Username  string `yaml:"username"`
		Password  string `yaml:"password"`
		Database  string `yaml:"database"`
		LoadTable string `yaml:"load_table"`
	} `yaml:"db_creds"`
}

func LoadConfig

func LoadConfig(configPath string) (*Config, error)

type MatchRequest

type MatchRequest struct {
	FirstName   string `json:"first_name"`
	LastName    string `json:"last_name"`
	PhoneNumber string `json:"phone_number"`
	Street      string `json:"street"`
	City        string `json:"city"`
	State       string `json:"state"`
	ZipCode     string `json:"zip_code"`
	TopN        int    `json:"top_n"`
	RunID       int    `json:"run_id"`
	ScriptPath  string `json:"script_path"`
}

MatchRequest represents a matching request

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL