Documentation ¶
Index ¶
Constants ¶
const ( // FRENCH_FORMAT ... FRENCH_FORMAT = "DD/MM/YYYY" // ISO_FORMAT ... ISO_FORMAT = "YYYYMMDD" // TIMESTAMP ... TIMESTAMP = "timestamp" // TIMESTAMP_MILLIS ... TIMESTAMP_MILLIS = "timestamp_millis" )
const ( // CODE_F for female CODE_F = "2" // CODE_M for male CODE_M = "1" // CODE_U for unknown or undefined CODE_U = "0" )
Variables ¶
This section is empty.
Functions ¶
func Normalize ¶
Normalize is the main function for the adaptation of the normalizing process developed for the Empreinte Sociometrique™ by Edgewhere: it takes the data to normalize and the normalizing function to use as well as some optional parameters, and returns the normalized string and an error if any.
func Uniformize ¶
Uniformize applies the basic normalizing process: trim, capitalize, ...
Types ¶
type Dictionary ¶
Dictionary ...
func (Dictionary) TranslateText ¶
func (d Dictionary) TranslateText(input string) string
TranslateText returns the passed string ith all its words passed through the TranslateWord function. The input should have been uniformized beforehand.
func (Dictionary) TranslateWord ¶
func (d Dictionary) TranslateWord(input string) string
TranslateWord returns the translated word if found in the dictionary, the original string otherwise. The input should have been uniformized beforehand.
type SimpleNormalizer ¶
SimpleNormalizer ...
var AddressLine SimpleNormalizer = func(input string) (string, error) { if len(input) == 0 { return "", errors.New("invalid empty string") } found := addressDictionary.TranslateText(Uniformize(input)) if found == "" { return "", errors.New("unable to build a normalized string") } return found, nil }
AddressLine returns a normalized address line (for line 2 to 6 in French postal convention but not only) TODO Become international in the address.dico file
var Any SimpleNormalizer = func(input string) (string, error) { uniformized := Uniformize(input) if uniformized == "" && input != "" { return "", errors.New("unable to normalize input string") } return uniformized, nil }
Any is the normalizing function to use as normalizer argument for any kind of data when no specific normalizer exists
var City SimpleNormalizer = func(input string) (string, error) { if len(input) == 0 { return "", errors.New("invalid empty string") } found := addressDictionary.TranslateText(Uniformize(input)) if found == "" { return "", errors.New("unable to build a normalized string") } spaces := regexp.MustCompile(`\s+`) found = spaces.ReplaceAllString(reCity.ReplaceAllString(found, ""), " ") return strings.TrimSpace(found), nil }
City returns a normalized city name
var CodePostalFrance SimpleNormalizer = func(input string) (string, error) { uniformized := Uniformize(input) if !reCPF.MatchString(uniformized) { return "", errors.New("invalid code postal") } if reCorse.MatchString(uniformized) { uniformized = strings.NewReplacer("A", "0", "B", "0").Replace(uniformized) } return uniformized, nil }
CodePostalFrance returns a normalized French zip code
var DepartementFrance SimpleNormalizer = func(input string) (string, error) { uniformized := Uniformize(input) matches := reDF.FindAllStringSubmatch(uniformized, 2) if len(matches) == 0 || len(matches[0]) != 3 { return "", errors.New("not a valid departement") } dpt := matches[0][1] if dpt == "20" { if cp, e := CodePostalFrance(input); e == nil { cpInt, _ := strconv.ParseInt(cp, 10, 64) if cpInt > 19999 && cpInt < 20200 { dpt = "2A" } else if cpInt > 20199 && cpInt < 20621 { dpt = "2B" } } } return dpt, nil }
DepartementFrance returns the two-letter code of a French "département"
var Email SimpleNormalizer = func(input string) (string, error) { processed := strings.ToLower(strings.TrimSpace(input)) if !IsValidEmail(processed) { return "", errors.New("invalid email") } return processed, nil }
Email returns a normalized e-mail
var FirstName SimpleNormalizer = func(input string) (string, error) { if len(input) == 0 { return "", errors.New("invalid empty string") } found := firstNameDictionary.TranslateWord(Uniformize(input)) if found == "" { return "", errors.New("unable to build a normalized string") } return found, nil }
FirstName returns a normalized first name
var Mobile SimpleNormalizer = func(input string) (string, error) { matches := reMob.FindStringSubmatch(strings.ReplaceAll(Uniformize(input), " ", "")) if len(matches) < 1 { return "", errors.New("invalid mobile string") } var parts = make([]string, 5) var p1, p2, p3, p4, p5 bool mob := "" for i, v := range matches { if i == 4 { international := "+" if len(v) > 0 { international += v } else { international += "33" } parts[0] = international p1 = true } if i == 5 { prefix := "(" if len(v) > 0 { prefix += v } else { prefix += "0" } prefix += ")" parts[1] = prefix p2 = true } if i == 6 { if v == "6" || v == "7" { mob = v } else { return "", errors.New("not a mobile phone") } } if i == 7 && mob != "" && v != "" { parts[2] = mob + v p3 = true } if i == 8 && v != "" { parts[3] = v p4 = true } if i == 9 && v != "" { parts[4] = v p5 = true } } if !p1 || !p2 || !p3 || !p4 || !p5 { return "", errors.New("unable to build normalized mobile") } return strings.Join(parts, " "), nil }
Mobile returns a normalized mobile, or an empty string and an error if failed. WARNING: The current implementation is specific to French mobile phones TODO Become international
var PhoneNumber SimpleNormalizer = func(input string) (string, error) { matches := reTel.FindStringSubmatch(strings.ReplaceAll(Uniformize(input), " ", "")) if len(matches) < 1 { return "", errors.New("invalid phone number") } var parts = make([]string, 5) var p1, p2, p3, p4, p5 bool for i, v := range matches { switch i { case 4: { international := "+" if len(v) > 0 { international += v } else { international += "33" } parts[0] = international p1 = true } case 9: { prefix := "(" if len(v) > 0 { prefix += v } else { prefix += "0" } prefix += ")" parts[1] = prefix p2 = true } case 11: if v != "" { parts[2] = v p3 = true } case 12: if v != "" { parts[3] = v p4 = true } case 13: if v != "" { parts[4] = v p5 = true } default: } } if !p1 || !p2 || !p3 || !p4 || !p5 { return "", errors.New("unable to build normalized phone number") } return strings.Join(parts, " "), nil }
PhoneNumber returns a normalized landline phone number, or an empty string and an error if failed. WARNING: The current implementation is specific to French mobile phones TODO Become international
var StreetNumber SimpleNormalizer = func(input string) (string, error) { matches := reSN.FindAllStringSubmatch(strings.TrimSpace(input), 2) if len(matches) == 0 || len(matches[0]) != 3 { return "", errors.New("probably not a street number") } num := matches[0][1] compUni := "" if matches[0][2] != "" { comp := Uniformize(matches[0][2]) if comp != "" { compUni = addressDictionary.TranslateText(comp) } } return num + compUni, nil }
StreetNumber returns a sanitized street number
var Title SimpleNormalizer = func(input string) (string, error) { if len(input) == 0 { return "", errors.New("invalid empty string") } uniformized := Uniformize(input) if uniformized == "M" || uniformized == "H" { return CODE_M, nil } if uniformized == "F" { return CODE_F, nil } if uniformized == "U" { return CODE_U, nil } found := reCiv.FindAllString(titleDictionary.TranslateText(uniformized), 2) if len(found) != 1 { if uniformized == "" { return "", errors.New("unable to build a normalized string") } return CODE_U, nil } return found[0], nil }
Title returns a code string: `0` for undefined, `1` for male, `2` for female
type VariadicNormalizer ¶
VariadicNormalizer ...
var DateOfBirth VariadicNormalizer = func(input string, params ...string) (string, error) { input = strings.TrimSpace(input) inputFormat := ISO_FORMAT if len(params) > 0 { switch strings.ToLower(params[0]) { case TIMESTAMP: inputFormat = TIMESTAMP case TIMESTAMP_MILLIS: inputFormat = TIMESTAMP_MILLIS default: inputFormat = strings.ToUpper(params[0]) } } outputFormat := FRENCH_FORMAT if len(params) > 1 { outputFormat = strings.ToUpper(params[1]) } var d time.Time if inputFormat == TIMESTAMP { i, e := strconv.ParseInt(input, 10, 64) if e != nil { return "", e } d = time.Unix(i, 0) } else if inputFormat == TIMESTAMP_MILLIS { millis := len(input) - 3 if millis < 0 { return "", errors.New("invalid timestamp in milliseconds") } nanos, e := strconv.ParseInt(input[millis:], 10, 64) if e != nil { return "", e } secs, e := strconv.ParseInt(input[:millis], 10, 64) if e != nil { return "", e } d = time.Unix(secs, nanos) } else { parsed, e := fmtdate.Parse(inputFormat, input) if e != nil { return "", e } d = parsed } return fmtdate.Format(outputFormat, d), nil }
DateOfBirth returns a normalized date using the `params` arguments, the latter being a list of optional arguments to use to format the output appropriately: - the first item is the string format of the input string (defaut to ISO format: `YYYYMMDD`); - the second item is the string format for the output (default to French date: `DD/MM/YYYY`). The input format could be a `timestamp` or a `timestamp_millis`.