|
@@ -67,10 +67,10 @@ func (ce *ColonkvEntity) divisionMoreKV(con string) string {
|
|
|
}
|
|
|
|
|
|
//获取冒号kv入口
|
|
|
-func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int,isSite bool,codeSite string) ([]*Kv, map[string]string) {
|
|
|
+func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int, isSite bool, codeSite string) ([]*Kv, map[string]string) {
|
|
|
kvs := ce.GetKvs(con, title, from)
|
|
|
if from == 1 {
|
|
|
- FormatContactKv(&kvs, title, nil, contactFormat,isSite,codeSite)
|
|
|
+ FormatContactKv(&kvs, title, nil, contactFormat, isSite, codeSite)
|
|
|
}
|
|
|
kv := map[string]string{}
|
|
|
for _, v := range kvs {
|
|
@@ -163,14 +163,14 @@ func (ce *ColonkvEntity) getColonKv(con, title string, from int) []*Kv {
|
|
|
}
|
|
|
|
|
|
//冒号kv和空格kv结合
|
|
|
-func (ce *ColonkvEntity) getColonSpaceKV(con string,isSite bool,codeSite string) []*Kv {
|
|
|
+func (ce *ColonkvEntity) getColonSpaceKV(con string, isSite bool, codeSite string) []*Kv {
|
|
|
con = colonkvEntity.processText(con)
|
|
|
lines := SspacekvEntity.getLines(con)
|
|
|
kvMaps := []*Kv{}
|
|
|
for _, line := range lines {
|
|
|
kvs := colonkvEntity.getColonKv(line, "", 1)
|
|
|
if len(kvs) == 0 {
|
|
|
- kv := SspacekvEntity.divideKV(line,isSite,codeSite)
|
|
|
+ kv := SspacekvEntity.divideKV(line, isSite, codeSite)
|
|
|
if kv != nil {
|
|
|
kvMaps = append(kvMaps, kv...)
|
|
|
}
|
|
@@ -276,7 +276,7 @@ func IsContactKvHandle(value string, m map[string]bool) bool {
|
|
|
|
|
|
//kv关于联系人信息的处理
|
|
|
//采购人>集中采购机构
|
|
|
-func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat,isSite bool,codeSite string) {
|
|
|
+func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat, isSite bool, codeSite string) {
|
|
|
////////////////////////////
|
|
|
//处理联系人信息
|
|
|
var indexMap map[int]string
|
|
@@ -294,14 +294,19 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
ascFind := true
|
|
|
ascFindFlag := len(indexMap) == 0 && buyers == nil
|
|
|
//采购人在联系人、电话后面的处理
|
|
|
- isCanAddToIndexMap := false
|
|
|
+ //qutil.Debug("indexMap---", indexMap)
|
|
|
+ //qutil.Debug("ascFind---", ascFind, "ascFindFlag---", ascFindFlag, "isCanAddToIndex---", isCanAddToIndexMap)
|
|
|
for _, kv := range *kvs {
|
|
|
- k := FilterContactKey(kv.Key)
|
|
|
+ isCanAddToIndexMap := false
|
|
|
+ k := FilterContactKey(kv.Key) //过滤key
|
|
|
+ //qutil.Debug(k, "---", kv.Value)
|
|
|
k_length := len([]rune(k))
|
|
|
if k_length < 2 || k_length > 15 {
|
|
|
continue
|
|
|
}
|
|
|
- isContinue := ContactInfoMustReg.MatchString(k)
|
|
|
+ isContinue := ContactInfoMustReg.MatchString(k) //精确匹配 邮编、电话、联系人等
|
|
|
+ //qutil.Debug("isContinue---", isContinue, ContactInfoVagueReg.MatchString(k), IsMapHasValue(k, ContactType), ascFindFlag)
|
|
|
+ //qutil.Debug((isContinue || (ContactInfoVagueReg.MatchString(k) && IsMapHasValue(k, ContactType))) && ascFindFlag)
|
|
|
if (isContinue || (ContactInfoVagueReg.MatchString(k) && IsMapHasValue(k, ContactType))) && ascFindFlag {
|
|
|
if len(indexMap) > 0 {
|
|
|
ascFind = true
|
|
@@ -311,30 +316,39 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
isCanAddToIndexMap = true
|
|
|
}
|
|
|
n := 1
|
|
|
+ //qutil.Debug("isCanAddToIndexMap---", isCanAddToIndexMap, "ascFind---", ascFind, "ascFindFlag---", ascFindFlag, "indexMap---", indexMap)
|
|
|
+ //qutil.Debug(" HasOrderContactType(k)---", HasOrderContactType(k))
|
|
|
for _, ct_k := range HasOrderContactType(k) {
|
|
|
+ //qutil.Debug("ct_k---", ct_k, !ContactType[ct_k].MatchString(k))
|
|
|
if !ContactType[ct_k].MatchString(k) {
|
|
|
continue
|
|
|
}
|
|
|
totalIndexMap[ct_k] = true
|
|
|
+ //qutil.Debug(isContinue, !ascFindFlag, totalIndexMap)
|
|
|
/////////////////////////////
|
|
|
if isContinue || !ascFindFlag {
|
|
|
continue
|
|
|
}
|
|
|
- // if isCanAddToIndexMap && len(indexMap) == 0 {
|
|
|
+ //qutil.Debug("isCanAddToIndexMap---", isCanAddToIndexMap)
|
|
|
if isCanAddToIndexMap {
|
|
|
indexMap[n] = ct_k
|
|
|
n++
|
|
|
ascFind = false
|
|
|
}
|
|
|
+ //qutil.Debug(n, ascFind, indexMap)
|
|
|
}
|
|
|
+ //qutil.Debug("indexMap---", indexMap)
|
|
|
}
|
|
|
+ //qutil.Debug("indexMap1---", indexMap)
|
|
|
mustMatchFirst := len(indexMap) > 0 //第一个必须匹配上
|
|
|
titleMatch := false
|
|
|
+ //qutil.Debug("title---", title, ContactTypeTitleMatch(title))
|
|
|
if titleMatchType := ContactTypeTitleMatch(title); titleMatchType != "" {
|
|
|
titleMatch = true
|
|
|
mustMatchFirst = false
|
|
|
indexMap = map[int]string{1: titleMatchType}
|
|
|
}
|
|
|
+ //qutil.Debug("titleMatch---", titleMatch, "mustMatchFirst---", mustMatchFirst, "indexMap---", indexMap)
|
|
|
// if titleMatchType := ContactTypeTitleMatch(title); len(titleMatchType) != 0 {
|
|
|
// titleMatch = true
|
|
|
// mustMatchFirst = false
|
|
@@ -351,6 +365,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
// Debug("bbbbbbbbbb", kv.Key, kv.Value)
|
|
|
// }
|
|
|
// }
|
|
|
+ //qutil.Debug("=========================================================")
|
|
|
startIndex := 0
|
|
|
prevKey := ""
|
|
|
index, tmpindex, notmatchCount, allMatchCount := 0, 0, 0, 0
|
|
@@ -360,6 +375,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
copy(kvsTemp, *kvs)
|
|
|
//again := 0
|
|
|
ishad := false
|
|
|
+ afterWinner := false
|
|
|
for kv_index, kv := range *kvs {
|
|
|
isBreak := true
|
|
|
v := strings.TrimSpace(kv.Value)
|
|
@@ -367,6 +383,12 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
isContinue := false
|
|
|
k := FilterContactKey(kv.Key)
|
|
|
k_length := len([]rune(k))
|
|
|
+ //3.4新增winnerperson和winnertel抽取
|
|
|
+ if indexMap[1] == "中标单位" && ContactInfoMustReg.MatchString(k) { //中标后是否出现电话、联系人、地址等信息
|
|
|
+ //qutil.Debug("kkkkkk:", k, indexMap)
|
|
|
+ afterWinner = true
|
|
|
+ }
|
|
|
+ //qutil.Debug(kv.Key, "++++++++++", kv.Value, buyers != nil, ascFind, isContinue)
|
|
|
if buyers != nil {
|
|
|
for _, buyer := range buyers {
|
|
|
if buyer == "" {
|
|
@@ -415,13 +437,16 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
}
|
|
|
}
|
|
|
} else if ascFind {
|
|
|
+ //qutil.Debug("HasOrderContactType(k)+++", HasOrderContactType(k))
|
|
|
for _, ct_k := range HasOrderContactType(k) {
|
|
|
ishad = false
|
|
|
+ //qutil.Debug("ct_k+++", ct_k, "ishad+++", ishad)
|
|
|
//again++
|
|
|
if k_length < 3 || k_length > 15 {
|
|
|
isBreak = false
|
|
|
continue
|
|
|
}
|
|
|
+ //qutil.Debug("+++", !ContactType[ct_k].MatchString(k))
|
|
|
if !ContactType[ct_k].MatchString(k) {
|
|
|
continue
|
|
|
}
|
|
@@ -436,7 +461,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
matchMap[ct_k] = map[string]bool{}
|
|
|
}
|
|
|
if !strings.HasSuffix(k, "方式") {
|
|
|
- kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts,isSite,codeSite)
|
|
|
+ kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts, isSite, codeSite)
|
|
|
if len(kvTags) == 1 {
|
|
|
tagVal, weightVal := FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
|
|
@@ -468,7 +493,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
}
|
|
|
}
|
|
|
if ct_k == "采购单位" { //打标签,权重高的重新覆盖
|
|
|
- kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"},isSite,codeSite)
|
|
|
+ kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"}, isSite, codeSite)
|
|
|
tagVal, weightVal := FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == ct_k {
|
|
|
if weightMap[ct_k][ct_k] == nil || (weightVal != nil && weightVal.(int) > weightMap[ct_k][ct_k].(int)) {
|
|
@@ -503,6 +528,13 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
isContinue = true
|
|
|
}
|
|
|
}
|
|
|
+ //qutil.Debug(len(indexMap), !afterWinner)
|
|
|
+ if len(indexMap) == 2 && !afterWinner { //处理同时出现winner、buyer、agency 5d6b2aa2a5cb26b9b73e79d3
|
|
|
+ //qutil.Debug("+++++++++++++++++++")
|
|
|
+ delete(indexMap, 1)
|
|
|
+ indexMap = map[int]string{1: indexMap[2]}
|
|
|
+ }
|
|
|
+ //qutil.Debug("isContinue+++", isContinue, indexMap)
|
|
|
if isContinue {
|
|
|
continue
|
|
|
}
|
|
@@ -557,6 +589,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
index++
|
|
|
}
|
|
|
}
|
|
|
+ //qutil.Debug("index+++", index, "prevKey+++", prevKey, "indexmap+++", indexMap)
|
|
|
// if startIndex == 0 || startIndex%2 == 1 || index == 0 {
|
|
|
// index = 1
|
|
|
// } else if startIndex%2 == 0 {
|
|
@@ -569,6 +602,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
continue
|
|
|
}
|
|
|
myContactType := indexMap[index]
|
|
|
+ //qutil.Debug("myContactType+++", myContactType)
|
|
|
if myContactType == "" {
|
|
|
continue
|
|
|
}
|
|
@@ -595,7 +629,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
allMatchCount++
|
|
|
delete(totalIndexMap, myContactType)
|
|
|
if !strings.HasSuffix(k, "方式") {
|
|
|
- kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts,isSite,codeSite)
|
|
|
+ kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts, isSite, codeSite)
|
|
|
if len(kvTags) == 1 {
|
|
|
tagVal, _ := FirstKeyValueInMap(kvTags)
|
|
|
if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
|
|
@@ -619,6 +653,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
|
|
|
kvTemp := *kv
|
|
|
kvTemp.Key = myContactType + k
|
|
|
kvTemp.Value = v
|
|
|
+ //qutil.Debug(kvTemp.Key, "----------------", kvTemp.Value)
|
|
|
(*kvs)[kv_index] = &kvTemp
|
|
|
if ascFind && isBreak && len(indexMap) > 0 {
|
|
|
break
|
|
@@ -707,20 +742,20 @@ func HasOrderContactType(text string) []string {
|
|
|
|
|
|
//两种冒号kv结合到一起
|
|
|
//from 1--全文 2--table td 3--table td解析采购单位联系人 4--分包
|
|
|
-func GetKVAll(content, title string, contactFormat *ContactFormat, from int,isSite bool,codeSite string) *JobKv {
|
|
|
+func GetKVAll(content, title string, contactFormat *ContactFormat, from int, isSite bool, codeSite string) *JobKv {
|
|
|
content = formatText(content, "kv")
|
|
|
- m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from,isSite,codeSite)
|
|
|
+ m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from, isSite, codeSite)
|
|
|
// for _, kvs := range m1Kvs {
|
|
|
// qutil.Debug(kvs.Key, kvs.Value)
|
|
|
// }
|
|
|
- kvTags := GetKvTags(m1Kvs, title, nil,isSite,codeSite)
|
|
|
+ kvTags := GetKvTags(m1Kvs, title, nil, isSite, codeSite)
|
|
|
// for k, kvs := range kvTags {
|
|
|
// qutil.Debug("kkkkk--", k)
|
|
|
// for _, kv := range kvs {
|
|
|
// qutil.Debug(kv.Key, kv.Value)
|
|
|
// }
|
|
|
// }
|
|
|
- m2Kvs, m2KvTags := GetKvFromtxt(content, title, from,isSite,codeSite)
|
|
|
+ m2Kvs, m2KvTags := GetKvFromtxt(content, title, from, isSite, codeSite)
|
|
|
// for k, kvs := range m2KvTags {
|
|
|
// qutil.Debug("kkkkk--", k)
|
|
|
// for _, kv := range kvs {
|
|
@@ -774,7 +809,7 @@ func PrintKvTags(kvTags map[string][]*Tag) {
|
|
|
}
|
|
|
|
|
|
//KVTags转kv
|
|
|
-func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool,codeSite string) map[string][]*Tag {
|
|
|
+func GetKvTags(findkvs []*Kv, title string, tagdbs []string, isSite bool, codeSite string) map[string][]*Tag {
|
|
|
kvTags := map[string][]*Tag{}
|
|
|
if title != "" && BlockTagMap[title] {
|
|
|
kvTags[title] = append(kvTags[title], &Tag{title, title, 0, nil, false})
|
|
@@ -792,17 +827,17 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool,codeSite
|
|
|
}
|
|
|
key = colonkvEntity.blockTitleKV(title, key)
|
|
|
//先用新的key
|
|
|
- tags := GetAppointTags(key, tagdbs,isSite,codeSite) //找标签库
|
|
|
+ tags := GetAppointTags(key, tagdbs, isSite, codeSite) //找标签库
|
|
|
if len(tags) == 0 && len(key) < 10 && len(title) > 0 && len(title) < 15 {
|
|
|
key = title + key
|
|
|
- tags = GetAppointTags(key, tagdbs,isSite,codeSite)
|
|
|
+ tags = GetAppointTags(key, tagdbs, isSite, codeSite)
|
|
|
}
|
|
|
//再用老的key
|
|
|
if len(tags) == 0 && k != key {
|
|
|
- tags = GetAppointTags(k, tagdbs,isSite,codeSite)
|
|
|
+ tags = GetAppointTags(k, tagdbs, isSite, codeSite)
|
|
|
if len(tags) == 0 && len(k) < 10 && len(title) > 0 && len(title) < 15 {
|
|
|
k = title + k
|
|
|
- tags = GetAppointTags(k, tagdbs,isSite,codeSite)
|
|
|
+ tags = GetAppointTags(k, tagdbs, isSite, codeSite)
|
|
|
if len(tags) > 0 {
|
|
|
key = k
|
|
|
}
|
|
@@ -824,7 +859,7 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool,codeSite
|
|
|
if strings.TrimSpace(nextval) == "" {
|
|
|
continue
|
|
|
}
|
|
|
- if GetAppointTags(nextval, tagdbs,isSite,codeSite).Len() > 0 || GetAppointTags(k, tagdbs,isSite,codeSite).Len() > 0 {
|
|
|
+ if GetAppointTags(nextval, tagdbs, isSite, codeSite).Len() > 0 || GetAppointTags(k, tagdbs, isSite, codeSite).Len() > 0 {
|
|
|
continue
|
|
|
}
|
|
|
}
|