gonp

package module
v0.0.0-...-cbe8565 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 24, 2024 License: MIT Imports: 5 Imported by: 0

README

workflow status

gonp

gonp is a diff algorithm implementation in Go.

Algorithm

The algorithm gonp uses is based on "An O(NP) Sequence Comparison Algorithm" by described by Sun Wu, Udi Manber and Gene Myers. An O(NP) Sequence Comparison Algorithm(following, Wu's O(NP) Algorithm) is the efficient algorithm for comparing two sequences.

Computational complexity

The computational complexity of Wu's O(NP) Algorithm is averagely O(N+PD), in the worst case, is O(NP).

Getting started

string difference

diff := gonp.New([]rune("abc"), []rune("abd"))
diff.Compose()
ed := diff.EditDistance() // ed is 2
lcs := diff.Lcs() // lcs is "ab"

ses := diff.Ses()
// ses is []SesElem{
//        {e: 'a', t: Common},
//        {e: 'b', t: Common},
//        {e: 'c', t: Delete},
//        {e: 'd', t: Add},
//        }

int slice difference

diff := gonp.New([]int{1,2,3}, []int{1,5,3})
diff.Compose()
ed := diff.EditDistance() // ed is 2
lcs := diff.Lcs() // lcs is [1,3]

ses := diff.Ses()
// ses is []SesElem{
//        {e: 1, t: Common},
//        {e: 2, t: Delete},
//        {e: 5, t: Add},
//        {e: 3, t: Common},
//        }

unified format difference

diff := gonp.New([]rune("abc"), []rune("abd"))
diff.Compose()

uniHunks := diff.UnifiedHunks()
diff.PrintUniHunks(uniHunks)
// @@ -1,3 +1,3 @@
//  a
//  b
// -c
// +d

Example

strdiff

$ make strdiff
go build -o strdiff examples/strdiff.go
$ ./strdiff abc abd
EditDistance: 2
LCS: ab
SES:
  a
  b
- c
+ d

intdiff

$ make intdiff
go build -o intdiff examples/intdiff.go
$ ./intdiff
diff [1 2 3 4 5] [1 2 9 4 5]
EditDistance: 2
LCS: [1 2 4 5]
SES:
  1
  2
- 3
+ 9
  4
  5

unistrdiff

$ make unistrdiff
go build -o unistrdiff examples/unistrdiff.go
$ ./unistrdiff abc abd
EditDistance:2
LCS:ab
Unified format difference:
@@ -1,3 +1,3 @@
 a
 b
-c
+d

uniintdiff

$ make uniintdiff
go build -o uniintdiff examples/uniintdiff.go
$ ./uniintdiff
diff [1 2 3 4 5] [1 2 9 4 5]
EditDistance: 2
LCS: [1 2 4 5]
Unified format difference:
@@ -1,5 +1,5 @@
 1
 2
-3
+9
 4
 5

unifilediff

$ make unifilediff
go build -o unifilediff examples/unifilediff.go
$ cat a.txt
a
b
c
$ cat b.txt
a
b
d
$ ./unifilediff a.txt b.txt
@@ -1,3 +1,3 @@
 a
 b
-c
+d

Documentation

Index

Examples

Constants

View Source
const (
	PhaseFrontDiff = iota
	PhaseInDiff
	PhaseBehindDiff
)
View Source
const (
	DefaultContextSize = 3
)
View Source
const (
	// limit of cordinate size
	DefaultRouteSize = 2000000
)

Variables

This section is empty.

Functions

func FprintUniHunks

func FprintUniHunks[T any](w io.Writer, uniHunks []UniHunk[T])

FprintUniHunks emit about unified format difference between a and b to w

func SprintUniHunks

func SprintUniHunks[T any](uniHunks []UniHunk[T]) string

SprintUniHunks returns string about unified format difference between a and b

Types

type Diff

type Diff[T Elem] struct {
	// contains filtered or unexported fields
}

Diff is context for calculating difference between a and b

func New

func New[T cmp.Ordered](a, b []T) *Diff[T]

func NewCmp

func NewCmp[T any](a, b []T, cmp func(T, T) int) *Diff[T]

NewCmp is initializer of Diff

func (*Diff[T]) Compose

func (diff *Diff[T]) Compose()

Compose composes diff between a and b

func (*Diff[T]) EditDistance

func (d *Diff[T]) EditDistance() int

EditDistance returns edit distance between a and b

func (*Diff[T]) FprintSes

func (diff *Diff[T]) FprintSes(w io.Writer)

FprintSes emit about shortest edit script between a and b to w

func (*Diff[T]) Lcs

func (diff *Diff[T]) Lcs() []T

Lcs returns LCS (Longest Common Subsequence) between a and b

func (*Diff[T]) OnlyEd

func (d *Diff[T]) OnlyEd() *Diff[T]

OnlyEd enables to calculate only edit distance

func (*Diff[T]) Patch

func (diff *Diff[T]) Patch(seq []T) []T

Patch applies SES between a and b to seq

Example (FilePatch)
package main

import (
	"bufio"
	"fmt"
	"log"
	"os"

	"github.com/quenbyako/gonp"
)

func main() {
	if len(os.Args) < 3 {
		log.Fatal("./filepatch filename1 filename2")
	}

	f1 := os.Args[1]
	f2 := os.Args[2]

	var (
		a   []string
		b   []string
		err error
	)

	a, err = getLines(f1)
	if err != nil {
		log.Fatalf("%s: %s", f1, err)
	}

	b, err = getLines(f2)
	if err != nil {
		log.Fatalf("%s: %s", f2, err)
	}

	diff := gonp.New(a, b)
	diff.Compose()

	patchedSeq := diff.Patch(a)
	fmt.Printf("success:%v, applying SES between '%s' and '%s'\n", equalsStringSlice(b, patchedSeq), f1, f2)

	uniPatchedSeq, err := diff.UniPatch(a, diff.UnifiedHunks())
	if err != nil {
		log.Fatal(err)
	}
	fmt.Printf("success:%v, applying unified format difference between '%s' and '%s'\n", equalsStringSlice(b, uniPatchedSeq), f1, f2)
}

func equalsStringSlice(a, b []string) bool {
	m, n := len(a), len(b)
	if m != n {
		return false
	}
	for i := 0; i < m; i++ {
		if a[i] != b[i] {
			return false
		}
	}
	return true
}

// getLines returns a file contents as string array
func getLines(f string) ([]string, error) {
	fp, err := os.Open(f)
	if err != nil {
		return []string{}, err
	}
	defer fp.Close()

	scanner := bufio.NewScanner(fp)
	lines := make([]string, 0)
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}
	return lines, nil
}
Output:

Example (UniIntDiff)
package main

import (
	"cmp"
	"fmt"

	"github.com/quenbyako/gonp"
)

func main() {
	a := []Row{{1, "Pupa"}, {2, "Lupa"}, {3, "Popa"}}
	b := []Row{{1, "Pupa"}, {2, "Lupa"}, {3, "Zhopa"}}
	diff := gonp.NewCmp(a, b, cmpRow)
	diff.Compose()
	fmt.Printf("diff %v %v\n", a, b)
	fmt.Printf("EditDistance: %d\n", diff.EditDistance())
	fmt.Printf("LCS: %v\n", diff.Lcs())
	fmt.Println("Unified format difference:")
	diff.PrintUniHunks(diff.UnifiedHunks())
}

type Row struct {
	ID   int
	Name string
}

func (r *Row) String() string {
	return fmt.Sprintf("%v, %v", r.ID, r.Name)
}

func cmpRow(a, b Row) int {
	for _, v := range []int{
		cmp.Compare(a.ID, b.ID),
		cmp.Compare(a.Name, b.Name),
	} {
		if v != 0 {
			return v
		}
	}

	return 0
}
Output:

func (*Diff[T]) PrintSes

func (diff *Diff[T]) PrintSes()

PrintSes prints shortest edit script between a and b

Example (IntDiff)
package main

import (
	"fmt"

	"github.com/quenbyako/gonp"
)

func main() {
	a := []int{1, 2, 3, 4, 5}
	b := []int{1, 2, 9, 4, 5}
	diff := gonp.New(a, b)
	diff.Compose()
	fmt.Printf("diff %v %v\n", a, b)
	fmt.Printf("EditDistance: %d\n", diff.EditDistance())
	fmt.Printf("LCS: %v\n", diff.Lcs())
	fmt.Println("SES:")
	diff.PrintSes()
}
Output:

Example (StrDiff)
package main

import (
	"bytes"
	"fmt"
	"log"
	"os"
	"unicode/utf8"

	"github.com/quenbyako/gonp"
)

func main() {
	if len(os.Args) < 3 {
		log.Fatal("./strdiff arg1 arg2")
	}
	if !utf8.ValidString(os.Args[1]) {
		log.Fatalf("arg1 contains invalid rune")
	}

	if !utf8.ValidString(os.Args[2]) {
		log.Fatalf("arg2 contains invalid rune")
	}
	a := []rune(os.Args[1])
	b := []rune(os.Args[2])
	diff := gonp.New(a, b)
	diff.Compose()
	fmt.Printf("EditDistance: %d\n", diff.EditDistance())
	fmt.Printf("LCS: %s\n", string(diff.Lcs()))
	fmt.Println("SES:")

	var buf bytes.Buffer
	ses := diff.Ses()
	for _, e := range ses {
		ee := e.GetElem()
		switch e.GetType() {
		case gonp.SesDelete:
			fmt.Fprintf(&buf, "-%c\n", ee)
		case gonp.SesAdd:
			fmt.Fprintf(&buf, "+%c\n", ee)
		case gonp.SesCommon:
			fmt.Fprintf(&buf, " %c\n", ee)
		}
	}
	fmt.Print(buf.String())
}
Output:

func (*Diff[T]) PrintUniHunks

func (diff *Diff[T]) PrintUniHunks(uniHunks []UniHunk[T])

PrintUniHunks prints unified format difference between and b

Example (UniFileDiff)
package main

import (
	"bufio"
	"bytes"
	"fmt"
	"log"
	"os"
	"time"

	"github.com/quenbyako/gonp"
)

func main() {
	if len(os.Args) < 3 {
		log.Fatal("./unifilediff filename1 filename2")
	}

	f1 := os.Args[1]
	f2 := os.Args[2]

	var (
		a   []string
		b   []string
		err error
	)

	a, err = getLines(f1)
	if err != nil {
		log.Fatalf("%s: %s", f1, err)
	}

	b, err = getLines(f2)
	if err != nil {
		log.Fatalf("%s: %s", f2, err)
	}

	th, err := buildTargetHeader(f1, f2)
	if err != nil {
		log.Fatal(err)
	}

	diff := gonp.New(a, b)
	diff.Compose()

	fmt.Printf(th.String())
	diff.PrintUniHunks(diff.UnifiedHunks())
}

// Target consists of a path and mtime of file.
type Target struct {
	fname string
	mtime time.Time
}

// TargetHeader has 2 targets based on pathes and mtimes based on 2 files
type TargetHeader struct {
	targets []Target
}

// getLines returns a file contents as string array
func getLines(f string) ([]string, error) {
	fp, err := os.Open(f)
	if err != nil {
		return []string{}, err
	}
	defer fp.Close()

	scanner := bufio.NewScanner(fp)
	lines := make([]string, 0)
	for scanner.Scan() {
		lines = append(lines, scanner.Text())
	}
	return lines, nil
}

// builderTargetHeader returns TargetHeader constructed based on 2 files given as arguments
func buildTargetHeader(f1, f2 string) (TargetHeader, error) {
	fi1, err := os.Stat(f1)
	if err != nil {
		return TargetHeader{}, err
	}
	fi2, err := os.Stat(f2)
	if err != nil {
		return TargetHeader{}, err
	}
	return TargetHeader{
		targets: []Target{
			{fname: f1, mtime: fi1.ModTime()},
			{fname: f2, mtime: fi2.ModTime()},
		},
	}, nil
}

// String returns a content of TargetHeader as a string
func (th *TargetHeader) String() string {
	if len(th.targets) != 2 {
		return ""
	}

	var b bytes.Buffer
	fmt.Fprintf(&b, "--- %s\t%s\n", th.targets[0].fname, th.targets[0].mtime.Format(time.RFC3339Nano))
	fmt.Fprintf(&b, "+++ %s\t%s\n", th.targets[1].fname, th.targets[1].mtime.Format(time.RFC3339Nano))
	return b.String()
}
Output:

func (*Diff[T]) Ses

func (diff *Diff[T]) Ses() []SesElem[T]

Ses return SES (Shortest Edit Script) between a and b

func (*Diff[T]) SetContextSize

func (d *Diff[T]) SetContextSize(n int) *Diff[T]

SetContextSize sets the context size of unified format difference

func (*Diff[T]) SetRouteSize

func (d *Diff[T]) SetRouteSize(n int) *Diff[T]

SetRouteSize sets the context size of unified format difference

func (*Diff[T]) SprintSes

func (diff *Diff[T]) SprintSes() string

SprintSes returns string about shortest edit script between a and b

func (*Diff[T]) UniPatch

func (diff *Diff[T]) UniPatch(seq []T, uniHunks []UniHunk[T]) ([]T, error)

UniPatch applies unified format difference between a and b to seq

Example (StrPatch)
package main

import (
	"fmt"
	"log"
	"os"
	"unicode/utf8"

	"github.com/quenbyako/gonp"
)

func main() {
	if len(os.Args) < 3 {
		log.Fatal("./strpatch arg1 arg2")
	}
	if !utf8.ValidString(os.Args[1]) {
		log.Fatal("arg1 contains invalid rune")
	}

	if !utf8.ValidString(os.Args[2]) {
		log.Fatal("arg2 contains invalid rune")
	}
	a := []rune(os.Args[1])
	b := []rune(os.Args[2])
	diff := gonp.New(a, b)
	diff.Compose()

	patchedSeq := diff.Patch(a)
	fmt.Printf("success:%v, applying SES between '%s' and '%s' to '%s' is '%s'\n",
		string(b) == string(patchedSeq),
		string(a), string(b),
		string(a), string(patchedSeq))

	uniPatchedSeq, err := diff.UniPatch(a, diff.UnifiedHunks())
	if err != nil {
		log.Fatal(err)
	}
	fmt.Printf("success:%v, applying unified format difference between '%s' and '%s' to '%s' is '%s'\n",
		string(b) == string(uniPatchedSeq),
		string(a), string(b),
		string(a), string(uniPatchedSeq))
}
Output:

Example (UniStrDiff)
package main

import (
	"bytes"
	"fmt"
	"log"
	"os"
	"unicode/utf8"

	"github.com/quenbyako/gonp"
)

func main() {
	if len(os.Args) < 3 {
		log.Fatal("./unistrdiff arg1 arg2")
	}
	if !utf8.ValidString(os.Args[1]) {
		log.Fatalf("arg1 contains invalid rune")
	}

	if !utf8.ValidString(os.Args[2]) {
		log.Fatalf("arg2 contains invalid rune")
	}
	a := []rune(os.Args[1])
	b := []rune(os.Args[2])
	diff := gonp.New(a, b)
	diff.Compose()
	fmt.Printf("EditDistance:%d\n", diff.EditDistance())
	fmt.Printf("LCS:%s\n", string(diff.Lcs()))
	//diff.PrintUniHunks(diff.UnifiedHunks())

	fmt.Println("Unified format difference:")
	uniHunks := diff.UnifiedHunks()
	var w bytes.Buffer
	for _, uniHunk := range uniHunks {
		fmt.Fprintf(&w, uniHunk.SprintDiffRange())
		for _, e := range uniHunk.GetChanges() {
			switch e.GetType() {
			case gonp.SesDelete:
				fmt.Fprintf(&w, "-%c\n", e.GetElem())
			case gonp.SesAdd:
				fmt.Fprintf(&w, "+%c\n", e.GetElem())
			case gonp.SesCommon:
				fmt.Fprintf(&w, " %c\n", e.GetElem())
			}
		}
	}
	fmt.Print(w.String())
}
Output:

func (*Diff[T]) UnifiedHunks

func (diff *Diff[T]) UnifiedHunks() []UniHunk[T]

UnifiedHunks composes unified format difference between a and b

type Elem

type Elem = any

Type constraints for element in SES

type Point

type Point struct {
	// contains filtered or unexported fields
}

Point is coordinate in edit graph

type PointWithRoute

type PointWithRoute struct {
	// contains filtered or unexported fields
}

PointWithRoute is coordinate in edit graph attached route

type SesElem

type SesElem[T any] struct {
	// contains filtered or unexported fields
}

SesElem is element of SES

func (SesElem[T]) Cmp

func (e SesElem[T]) Cmp(b SesElem[T], c func(T, T) int) int

func (*SesElem[T]) GetElem

func (e *SesElem[T]) GetElem() T

GetElem is getter of element of SES

func (*SesElem[T]) GetType

func (e *SesElem[T]) GetType() SesType

GetType is getter of manipulation type of SES

type SesType

type SesType int

SesType is manipulaton type

const (
	// SesDelete is manipulaton type of deleting element in SES
	SesDelete SesType = iota
	// SesCommon is manipulaton type of same element in SES
	SesCommon
	// SesAdd is manipulaton type of adding element in SES
	SesAdd
)

type UniHunk

type UniHunk[T Elem] struct {
	// contains filtered or unexported fields
}

UniHunk is an element of unified format difference

func (*UniHunk[T]) GetChanges

func (uniHunk *UniHunk[T]) GetChanges() []SesElem[T]

GetChanges is getter of changes in UniHunk

func (*UniHunk[T]) SprintDiffRange

func (uniHunk *UniHunk[T]) SprintDiffRange() string

SprintDiffRange returns formatted string represents difference range

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL