1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- //识别空格kv
- package pretreated
- import (
- "jy/util"
- "regexp"
- "strings"
- )
- type SpacekvEntity struct{}
- var (
- SspacekvEntity = &SpacekvEntity{}
- filterLine = regexp.MustCompile("[::,,。??'\"“”‘’·~!…+=|&*#$【】]")
- filterSpaceKey = regexp.MustCompile("[((][^((]+[))]")
- excludeSpaceKey = regexp.MustCompile("[.、�\\[【{{〔<《\\]】}}〕>》]")
- )
- func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.ContactFormat) *util.JobKv {
- lines := se.getLines(text)
- kvMaps := []*util.Kv{}
- for _, line := range lines {
- kvMap := se.divideKV(line)
- if kvMap == nil {
- continue
- }
- kvMaps = append(kvMaps, kvMap...)
- }
- FormatContactKv(&kvMaps, title, nil, contactFormat)
- kvTags := GetKvTags(kvMaps, title, nil)
- return &util.JobKv{
- Kvs: kvMaps,
- KvTags: kvTags,
- }
- }
- //空格分kv
- func (se *SpacekvEntity) divideKV(line string) []*util.Kv {
- line = strings.TrimSpace(line)
- line = regReplAllSpace.ReplaceAllString(line, " ")
- line = TimeHM.ReplaceAllString(line, "D$1H$2M")
- if line == "" || strings.Count(line, " ") == 0 || filterLine.MatchString(line) {
- return nil
- }
- kv := strings.Split(line, " ")
- kvs := []*util.Kv{}
- for i := 0; i+1 <= len(kv)-1; i = i + 2 {
- k, v := kv[i], kv[i+1]
- k = filterSpaceKey.ReplaceAllString(k, "")
- //key字数限制
- if len([]rune(k)) <= 1 || len([]rune(k)) > 15 {
- continue
- }
- //过滤key
- if excludeSpaceKey.MatchString(k) {
- continue
- }
- kvs = append(kvs, &util.Kv{Key: k, Value: v})
- }
- return kvs
- }
- //分段
- func (se *SpacekvEntity) getLines(text string) []string {
- lines := strings.FieldsFunc(text, func(r rune) bool {
- return r == 10 || r == 13
- })
- arrays := []string{}
- for _, line := range lines {
- line = regTrimSpace.ReplaceAllString(line, "")
- if line == "" {
- continue
- }
- arrays = append(arrays, line)
- }
- return arrays
- }
|