dfa.go 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. package main
  2. // 多叉树
  3. type (
  4. //MultiTreeNode 多叉树节点
  5. MultiTreeNode struct {
  6. Key rune
  7. End bool
  8. Children map[rune]*MultiTreeNode
  9. }
  10. //MultiTree 多叉树
  11. MultiTree struct {
  12. Root *MultiTreeNode
  13. }
  14. //结果
  15. Result struct {
  16. Data string
  17. Start, End int
  18. }
  19. )
  20. func NewMultiTree() *MultiTree {
  21. return &MultiTree{
  22. Root: &MultiTreeNode{
  23. Children: make(map[rune]*MultiTreeNode),
  24. },
  25. }
  26. }
  27. func (mt *MultiTree) Add(segment string) {
  28. path := []rune(segment)
  29. node := mt.Root
  30. pathLen := len(path)
  31. for index, t := range path {
  32. end := index == pathLen-1
  33. if v, ok := node.Children[t]; ok {
  34. node = v
  35. if end { //后加入的词(可能是老词的子集),需要修正属性
  36. node.End = end
  37. }
  38. } else {
  39. leafNode := &MultiTreeNode{
  40. Key: t,
  41. Children: make(map[rune]*MultiTreeNode),
  42. }
  43. if end { //是结束节点
  44. leafNode.End = true
  45. }
  46. node.Children[t] = leafNode
  47. node = leafNode
  48. }
  49. }
  50. }
  51. // Match 匹配 / 查找
  52. func (mt *MultiTree) Match(segment string, maximum bool) []*Result {
  53. var wordNode *MultiTreeNode
  54. node := mt.Root
  55. var ok bool
  56. path := []rune(segment)
  57. ret := make([]*Result, 0, 0)
  58. pos := 0
  59. //找第一层
  60. for pos < len(path) {
  61. if wordNode, ok = node.Children[path[pos]]; !ok {
  62. pos += 1
  63. continue
  64. }
  65. skipStep := 1
  66. for w := pos + 1; w < len(path); w++ {
  67. if wordNode, ok = wordNode.Children[path[w]]; !ok {
  68. break
  69. }
  70. if wordNode.End && !maximum {
  71. return []*Result{
  72. &Result{
  73. Data: string(path[pos : w+1]), Start: pos, End: w + 1},
  74. }
  75. } else if wordNode.End {
  76. ret = append(ret, &Result{
  77. Data: string(path[pos : w+1]), Start: pos, End: w + 1})
  78. skipStep = w - pos
  79. }
  80. }
  81. pos += skipStep
  82. }
  83. return ret
  84. }