nlpword

package
v2.1.14 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 20, 2021 License: Apache-2.0 Imports: 6 Imported by: 0

README

敏感词查找,验证,过滤和替换

Usage:

package main

import (
	"fmt"
	"github.com/abulo/ratel/v2/nlpword"
)

func main() {
	filter := nlpword.New()
	filter.LoadWordDict("path/to/dict")
	filter.LoadNetWordDict("https://raw.githubusercontent.com/importcjj/sensitive/master/dict/dict.txt")
	filter.AddWord("长者")

	fmt.Println(filter.Filter("我为长者续一秒")) // 我为续一秒
	fmt.Println(filter.Replace("我为长者续一秒", '*')) // 我为**续一秒
	fmt.Println(filter.FindIn("我为长者续一秒"))      // true, 长者
	fmt.Println(filter.Validate("我为长者续一秒"))      // False, 长者
	fmt.Println(filter.FindAll("我为长者续一秒")) // [长者]

	fmt.Println(filter.FindIn("我为长x者续一秒")) // false
	filter.UpdateNoisePattern(`x`)
	fmt.Println(filter.FindIn("我为长x者续一秒")) // true, 长者
	fmt.Println(filter.Validate("我为长x者续一秒"))      // False, 长者
}

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Filter

type Filter struct {
	// contains filtered or unexported fields
}

Filter 敏感词过滤器

func New

func New() *Filter

New 返回一个敏感词过滤器

func (*Filter) AddWord

func (filter *Filter) AddWord(words ...string)

AddWord 添加敏感词

func (*Filter) Filter

func (filter *Filter) Filter(text string) string

Filter 过滤敏感词

func (*Filter) FindAll

func (filter *Filter) FindAll(text string) []string

FindAll 找到所有匹配词

func (*Filter) FindIn

func (filter *Filter) FindIn(text string) (bool, string)

FindIn 检测敏感词

func (*Filter) Load

func (filter *Filter) Load(rd io.Reader) error

Load common method to add words

func (*Filter) LoadNetWordDict

func (filter *Filter) LoadNetWordDict(url string) error

LoadNetWordDict 加载网络敏感词字典

func (*Filter) LoadWordDict

func (filter *Filter) LoadWordDict(path string) error

LoadWordDict 加载敏感词字典

func (*Filter) RemoveNoise

func (filter *Filter) RemoveNoise(text string) string

RemoveNoise 去除空格等噪音

func (*Filter) Replace

func (filter *Filter) Replace(text string, repl rune) string

Replace 和谐敏感词

func (*Filter) UpdateNoisePattern

func (filter *Filter) UpdateNoisePattern(pattern string)

UpdateNoisePattern 更新去噪模式

func (*Filter) Validate

func (filter *Filter) Validate(text string) (bool, string)

Validate 检测字符串是否合法

type LinkList struct {
	// contains filtered or unexported fields
}

LinkList ...

func (*LinkList) Empty

func (list *LinkList) Empty() bool

Empty returns true if there is none node

func (*LinkList) Pop

func (list *LinkList) Pop() interface{}

Pop returns the value of the first node

func (*LinkList) Push

func (list *LinkList) Push(v interface{})

Push appends a node

type Node

type Node struct {
	Character rune
	Children  map[rune]*Node
	Failure   *Node
	Parent    *Node
	// contains filtered or unexported fields
}

Node Trie树上的一个节点.

func NewNode

func NewNode(character rune) *Node

NewNode 新建子节点

func NewRootNode

func NewRootNode(character rune) *Node

NewRootNode 新建根节点

func (*Node) IsLeafNode

func (node *Node) IsLeafNode() bool

IsLeafNode 判断是否叶子节点

func (*Node) IsPathEnd

func (node *Node) IsPathEnd() bool

IsPathEnd 判断是否为某个路径的结束

func (*Node) IsRootNode

func (node *Node) IsRootNode() bool

IsRootNode 判断是否为根节点

type Trie

type Trie struct {
	Root *Node
}

Trie 短语组成的Trie树.

func NewTrie

func NewTrie() *Trie

NewTrie 新建一棵Trie

func (*Trie) Add

func (tree *Trie) Add(words ...string)

Add 添加若干个词

func (tree *Trie) BuildFailureLinks()

BuildFailureLinks 更新Aho-Corasick的失败表

func (*Trie) Filter

func (tree *Trie) Filter(text string) string

Filter 直接过滤掉字符串中的敏感词

func (*Trie) FindAll

func (tree *Trie) FindAll(text string) []string

FindAll 找有所有包含在词库中的词

func (*Trie) FindIn

func (tree *Trie) FindIn(text string) (bool, string)

FindIn 判断text中是否含有词库中的词

func (*Trie) Replace

func (tree *Trie) Replace(text string, character rune) string

Replace 词语替换

func (*Trie) Validate

func (tree *Trie) Validate(text string) (bool, string)

Validate 验证字符串是否合法,如不合法则返回false和检测到 的第一个敏感词

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL