Browse Source

5d423e22a5cb26b9b7721f25
kv跨行抽取

fengweiqiang 6 years ago
parent
commit
9fdeb1c7ce
1 changed files with 13 additions and 1 deletions
  1. 13 1
      src/jy/pretreated/colonkv.go

+ 13 - 1
src/jy/pretreated/colonkv.go

@@ -9,6 +9,7 @@ import (
 	"regexp"
 	"sort"
 	"strings"
+	"unicode/utf8"
 )
 
 type ColonkvEntity struct{}
@@ -680,7 +681,7 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
 		kvTags[title] = append(kvTags[title], &Tag{title, title, 0, nil, false})
 	}
 	for _, findkv := range findkvs {
-		k, val := findkv.Key, findkv.Value
+		k, val, nextval := findkv.Key, findkv.Value, strings.TrimSpace(findkv.NextLine)
 		//val是空的话,不打标签
 		if filterValue.MatchString(val) {
 			continue
@@ -715,6 +716,17 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
 				}
 				if val != "" {
 					kvTags[tk.Value] = append(kvTags[tk.Value], &Tag{Key: k, Value: val, Weight: tk.Weight})
+				} else if nextval != "" && utf8.RuneCountInString(nextval) < 30 {
+					if strings.Contains(nextval, ":") || strings.Contains(nextval, ":") {
+						if len(strings.Split(nextval, ":")) > 1 || len(strings.Split(nextval, ":")) > 1 {
+							nextval = strings.Split(nextval, ":")[0]
+							nextval = strings.Split(nextval, ":")[0]
+							if strings.TrimSpace(nextval) == "" {
+								continue
+							}
+						}
+					}
+					kvTags[tk.Value] = append(kvTags[tk.Value], &Tag{Key: k, Value: nextval, Weight: tk.Weight})
 				}
 			}
 		} else {