//识别空格kv package pretreated import ( "jy/util" "regexp" "strings" ) type SpacekvEntity struct{} var ( SspacekvEntity = &SpacekvEntity{} filterLine = regexp.MustCompile("[::,,。??'\"“”‘’·~!…+=|&*#$【】]") filterSpaceKey = regexp.MustCompile("[((][^((]+[))]") excludeSpaceKey = regexp.MustCompile("[.、�\\[【{{〔<《\\]】}}〕>》]") ) func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.ContactFormat) *util.JobKv { lines := se.getLines(text) kvMaps := []*util.Kv{} for _, line := range lines { kvMap := se.divideKV(line) if kvMap == nil { continue } kvMaps = append(kvMaps, kvMap...) } FormatContactKv(&kvMaps, title, nil, contactFormat) kvTags := GetKvTags(kvMaps, title, nil) return &util.JobKv{ Kvs: kvMaps, KvTags: kvTags, } } //空格分kv func (se *SpacekvEntity) divideKV(line string) []*util.Kv { line = strings.TrimSpace(line) line = regReplAllSpace.ReplaceAllString(line, " ") line = TimeHM.ReplaceAllString(line, "D$1H$2M") if line == "" || strings.Count(line, " ") == 0 || filterLine.MatchString(line) { return nil } kv := strings.Split(line, " ") kvs := []*util.Kv{} for i := 0; i+1 <= len(kv)-1; i = i + 2 { k, v := kv[i], kv[i+1] k = filterSpaceKey.ReplaceAllString(k, "") //key字数限制 if len([]rune(k)) <= 1 || len([]rune(k)) > 15 { continue } //过滤key if excludeSpaceKey.MatchString(k) { continue } kvs = append(kvs, &util.Kv{Key: k, Value: v}) } return kvs } //分段 func (se *SpacekvEntity) getLines(text string) []string { lines := strings.FieldsFunc(text, func(r rune) bool { return r == 10 || r == 13 }) arrays := []string{} for _, line := range lines { line = regTrimSpace.ReplaceAllString(line, "") if line == "" { continue } arrays = append(arrays, line) } return arrays }