lsdp

package module
v1.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 9, 2018 License: MIT Imports: 0 Imported by: 2

README

go-lsd-parametrized

Weighted Leveshtein Distance and its extended interfaces written in Go.

godoc

Installation

go get -u github.com/deltam/go-lsd-parametrized

Usage

package main

import (
    "fmt"

    "github.com/deltam/go-lsd-parametrized"
)

func main() {
    a, b := "kitten", "shitting"

    // standard
    fmt.Println(lsdp.Lsd(a, b))
    // Output:
    // 4

    // weighted
    wd := lsdp.Weights{Insert: 0.1, Delete: 1, Replace: 0.01}
    fmt.Println(wd.Distance(a, b))
    // Output:
    // 0.22

    // weighted and normalized
    nd := lsdp.Normalized(wd)
    fmt.Println(nd.Distance(a, b))
    // Output:
    // 0.0275

    // weighted by rune
    wr := lsdp.ByRune(&lsdp.Weights{1, 1, 1}).
        Insert("g", 0.1).
        Insert("h", 0.01).
        Replace("k", "s", 0.001).
        Replace("e", "i", 0.0001)
    fmt.Println(wr.Distance(a, b))
    // Output:
    // 0.1111
}

Operators

func main() {
    std := lsdp.Weights{1, 1, 1}
    fruits := []string{"apple", "orange", "lemon", "water melon"}

    // find nearest string
    s, d := lsdp.Nearest(std, "aple", fruits)
    fmt.Println(s, d)
    // Output:
    // apple 2

    // calculate distance of each strings
    ds := lsdp.DistanceAll(std, "aple", fruits)
    fmt.Println(ds)
    // Output:
    // [1 4 5 9]
}

Custom Distance

func lenDiff(a, b string) float64 {
    d := utf8.RuneCountInString(a) - utf8.RuneCountInString(b)
    return math.Abs(float64(d))
}

func main() {
    var d lsdp.DistanceFunc = lenDiff
    fmt.Println(d.Distance("kitten", "shitting"))
    // Output:
    // 2

    group := []string{"", "a", "ab", "abc"}
    s, dist := lsdp.Nearest(d, "xx", group)
    fmt.Println(s, dist)
    // Output:
    // ab 0
}

Composite two Distances

func Far(dm1, dm2 lsdp.DistanceMeasurer) lsdp.DistanceMeasurer {
    return &far{dm1: dm1, dm2: dm2}
}

type far struct {
    dm1, dm2 lsdp.DistanceMeasurer
}

func (f *far) Distance(a, b string) float64 {
    d1 := f.dm1.Distance(a, b)
    d2 := f.dm2.Distance(a, b)
    if d1 > d2 {
        return d1
    }
    return d2
}

func main() {
    a, b := "kitten", "shitting"

    std := lsdp.Weights{Insert: 1, Delete: 1, Replace: 1}
    fmt.Println(std.Distance(a, b))
    // Output:
    // 4

    wd := lsdp.Weights{Insert: 10, Delete: 1, Replace: 0.1}
    fmt.Println(wd.Distance(a, b))
    // Output:
    // 20.2

    fd := Far(std, wd)
    fmt.Println(fd.Distance(a, b))
    // Output:
    // 20.2
}

Use Case

  • Clustering error messages

License

MIT License

Documentation

Overview

Package lsdp provides Weighted Levenshtein distance and its extended interface

Index

Examples

Constants

This section is empty.

Variables

This section is empty.

Functions

func DistanceAll

func DistanceAll(dm DistanceMeasurer, orig string, strs []string) []float64

DistanceAll returns slice of distance orig to each strs

Example
std := lsdp.Weights{1, 1, 1}
group := []string{"apple", "orange", "lemon", "water melon"}
fmt.Println(lsdp.DistanceAll(std, "mon", group))
Output:

[5 5 2 8]

func Lsd

func Lsd(a, b string) int

Lsd returns standard Levenshtein distance

func Nearest

func Nearest(dm DistanceMeasurer, orig string, strs []string) (nearest string, distance float64)

Nearest returns the nearest string in the specified distance measurer

Example
std := lsdp.Weights{1, 1, 1}
group := []string{"apple", "orange", "lemon", "water melon"}
fmt.Println(lsdp.Nearest(std, "mon", group))
Output:

lemon 2

Types

type DistanceFunc added in v1.4.0

type DistanceFunc func(string, string) float64

DistanceFunc type is an adapter to allow the use of ordinary functions as DistanceMeasurer. Similar to http.HandlerFunc.

func (DistanceFunc) Distance added in v1.4.0

func (f DistanceFunc) Distance(a, b string) float64

Distance calls f(a,b)

type DistanceMeasurer

type DistanceMeasurer interface {
	Distance(string, string) float64
}

DistanceMeasurer provides measurement of the distance between 2 strings

func Normalized

func Normalized(dm DistanceMeasurer) DistanceMeasurer

Normalized returns what wrapped the DistanceMeasurer with nomalize by string length

type EditCounts

type EditCounts [4]int

EditCounts represents aggregating by editing types

func CountEdit

func CountEdit(a, b string) (int, EditCounts)

CountEdit aggregates the minimum number of edits to change from a to b

func (EditCounts) Get

func (ec EditCounts) Get(t EditType) int

Get the number of specified edit

type EditType

type EditType int

EditType represents authorized editing means in Levenshtein distance

const (
	INSERT EditType = iota
	DELETE
	REPLACE
	NONE
)

Authorized editing means: insert, delete, replace, none

type LevenshteinParam

type LevenshteinParam struct {
	Insert  float64
	Delete  float64
	Replace float64
}

LevenshteinParam represents Levenshtein distance parameters for weighted by edit counts

func (LevenshteinParam) Distance

func (p LevenshteinParam) Distance(a, b string) float64

Distance returns Levenshtein distance

type Weights

type Weights struct {
	Insert  float64
	Delete  float64
	Replace float64
}

Weights represents cost parameters for weighted Levenshtein distance

func (Weights) Distance

func (w Weights) Distance(a, b string) float64

Distance returns weighted Levenshtein distance

Example
wd := lsdp.Weights{Insert: 0.1, Delete: 1, Replace: 0.01}
fmt.Println(wd.Distance("kitten", "shitting"))
Output:

0.22

type WeightsByRune

type WeightsByRune struct {
	// contains filtered or unexported fields
}

WeightsByRune represents weighted levenshtein distance by rune

func ByRune

func ByRune(w *Weights) *WeightsByRune

ByRune returns weighted levenshtein distance by rune

Example
wr := lsdp.ByRune(&lsdp.Weights{1, 1, 1}).
	Insert("a", 0.1).
	Delete("b", 0.01).
	Replace("c", "d", 0.001)
fmt.Println(wr.Distance("bc", "ad"))
Output:

0.111

func (*WeightsByRune) Delete

func (wr *WeightsByRune) Delete(runeGroup string, delCost float64) *WeightsByRune

Delete specify cost by delete rune

func (*WeightsByRune) Distance

func (wr *WeightsByRune) Distance(a, b string) float64

Distance returns weighted levenshtein distance by rune

func (*WeightsByRune) Insert

func (wr *WeightsByRune) Insert(runeGroup string, insCost float64) *WeightsByRune

Insert specify cost by insert rune

func (*WeightsByRune) Replace

func (wr *WeightsByRune) Replace(runeGroupSrc, runeGroupDest string, repCost float64) *WeightsByRune

Replace specify cost by replace rune

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL