|
@@ -784,10 +784,12 @@ func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
|
|
|
//分割表格
|
|
|
table.bSplit(n, ts)
|
|
|
//对没有表头表格的处理
|
|
|
- _, _, b := CheckMultiPackage(table.Tag, "")
|
|
|
- if b {
|
|
|
- table.StandKV["项目名称"] = table.Tag
|
|
|
- table.StandKVWeight["项目名称"] = -100
|
|
|
+ if table.Tag != "" {
|
|
|
+ _, _, b := CheckMultiPackage(table.Tag, "")
|
|
|
+ if b {
|
|
|
+ table.StandKV["项目名称"] = table.Tag
|
|
|
+ table.StandKVWeight["项目名称"] = -100
|
|
|
+ }
|
|
|
}
|
|
|
table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
|
|
|
//开始查找kv,核心模块,table.SortKV
|
|
@@ -2383,7 +2385,8 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
|
|
|
contactTypeTagMap := map[string]map[string][]interface{}{}
|
|
|
//u.Debug(mustMatchFirst, indexMap, matchMap)
|
|
|
notMatchTrCount := 0
|
|
|
- allAscFind := true
|
|
|
+ allAscFind := true //开启正序查询
|
|
|
+ //涉及变量allAscFind,indexMap
|
|
|
if len(indexMap) == 0 {
|
|
|
isCanAddToIndexMap := false
|
|
|
matchPrevFlag := false
|
|
@@ -2391,39 +2394,21 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
|
|
|
LS:
|
|
|
for _, tr := range tn.TRs {
|
|
|
for td_index, td := range tr.TDs {
|
|
|
- thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取有序的kv
|
|
|
- if len(thisTdKvs) == 0 {
|
|
|
- tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
|
|
|
- if tdValue != "" && len([]rune(tdValue)) < 10 {
|
|
|
- thisTdKvs = append(thisTdKvs, &u.Kv{
|
|
|
- Key: tdValue,
|
|
|
- Value: "",
|
|
|
- })
|
|
|
- }
|
|
|
- }
|
|
|
+ thisTdKvs := tn.tdkv(td) //获取td冒号kv
|
|
|
if len(thisTdKvs) != 1 {
|
|
|
continue
|
|
|
}
|
|
|
- //采购人在联系人、电话后面的处理//采购单位,代理机构
|
|
|
- td_k := FilterContactKey(thisTdKvs[0].Key)
|
|
|
- td_k_length := len([]rune(td_k))
|
|
|
- if td_k_length < 2 || td_k_length > 15 {
|
|
|
- continue
|
|
|
- }
|
|
|
- isContinue := ContactInfoMustReg.MatchString(td_k)
|
|
|
- if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
|
|
|
- if !matchPrevFlag && len(indexMap) > 0 {
|
|
|
- indexMap = map[int]string{}
|
|
|
- break LS
|
|
|
- }
|
|
|
- isCanAddToIndexMap = true
|
|
|
+ //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
|
|
|
+ goOnFunc, isContinue, td_k := tn.tdKV(thisTdKvs[0].Key, &matchPrevFlag, &isCanAddToIndexMap, &indexMap, "LS")
|
|
|
+ if !goOnFunc {
|
|
|
+ break LS
|
|
|
}
|
|
|
if isContinue {
|
|
|
continue
|
|
|
}
|
|
|
//采购单位,代理机构
|
|
|
for _, k := range HasOrderContactType(td_k) {
|
|
|
- if !ContactType[k].MatchString(td_k) {
|
|
|
+ if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构跳过
|
|
|
continue
|
|
|
}
|
|
|
if len(indexMap) == 0 {
|
|
@@ -2431,9 +2416,9 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
|
|
|
myPrevTdVal := ""
|
|
|
if td_index-2 >= 0 {
|
|
|
myPrevTdVal = tr.TDs[td_index-2].Val
|
|
|
- }
|
|
|
- if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
|
|
|
- matchPrevFlag = true
|
|
|
+ if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
|
|
|
+ matchPrevFlag = true
|
|
|
+ }
|
|
|
}
|
|
|
indexMap[0] = k
|
|
|
break
|
|
@@ -2458,195 +2443,37 @@ L:
|
|
|
jumpNextTd := false
|
|
|
for td_index, td := range tr.TDs {
|
|
|
//和|以?及|与|、多个词和在一起
|
|
|
- if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
|
|
|
- thisTdVals := regSplit.Split(td.Text, -1)
|
|
|
- nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
|
|
|
- if len(thisTdVals) == len(nextTdVals) {
|
|
|
- isHandle := false
|
|
|
- for _, k := range HasOrderContactType(td.Text) {
|
|
|
- if ContactType[k].MatchString(td.Text) {
|
|
|
- for thisTdVals_k, thisTdVals_v := range thisTdVals {
|
|
|
- thisTdVals_v = strings.TrimSpace(thisTdVals_v)
|
|
|
- if ContactType[k].MatchString(thisTdVals_v) {
|
|
|
- thisTrHasMatch = true
|
|
|
- tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
|
|
|
- continue
|
|
|
- }
|
|
|
- if !ContactInfoMustReg.MatchString(thisTdVals_v) {
|
|
|
- continue
|
|
|
- }
|
|
|
- jumpNextTd = true
|
|
|
- thisTrHasMatch = true
|
|
|
- tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
|
|
|
- }
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- if !isHandle && len(indexMap) > 0 {
|
|
|
- _, onlyContactType := u.FirstKeyValueInMap(indexMap)
|
|
|
- if myContactType, _ := onlyContactType.(string); myContactType != "" {
|
|
|
- for thisTdVals_k, thisTdVals_v := range thisTdVals {
|
|
|
- thisTdVals_v = strings.TrimSpace(thisTdVals_v)
|
|
|
- if ContactInfoMustReg.MatchString(thisTdVals_v) {
|
|
|
- jumpNextTd = true
|
|
|
- thisTrHasMatch = true
|
|
|
- tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
- jumpNextTd = false
|
|
|
- }
|
|
|
- ///////////////////////////////////////
|
|
|
- thisTdKvs := kvAfterDivideBlock(td.Text, 3, tn.TableResult.RuleBlock) //分块之后的kv
|
|
|
+ jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
|
|
|
+ //分块之后的kv
|
|
|
+ thisTdKvs := kvAfterDivideBlock(td.Text, 3, tn.TableResult.RuleBlock)
|
|
|
if len(thisTdKvs) == 0 {
|
|
|
- thisTdKvs = colonkvEntity.GetKvs(td.Text, tn.Desc, 2)
|
|
|
- }
|
|
|
- if len(thisTdKvs) == 0 {
|
|
|
- tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
|
|
|
- if tdValue != "" && len([]rune(tdValue)) < 15 {
|
|
|
- thisTdKvs = append(thisTdKvs, &u.Kv{
|
|
|
- Key: tdValue,
|
|
|
- Value: "",
|
|
|
- })
|
|
|
- }
|
|
|
+ thisTdKvs = tn.tdkv(td) //获取冒号kv
|
|
|
}
|
|
|
- tdAscFind := true
|
|
|
+ tdAscFind := true //开启td正序查询
|
|
|
if len(thisTdKvs) == 0 {
|
|
|
continue
|
|
|
} else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 {
|
|
|
//采购人在联系人、电话后面的处理
|
|
|
- isCanAddToIndexMap := false
|
|
|
- LL:
|
|
|
- for _, td_kv := range thisTdKvs {
|
|
|
- //u.Debug(td_kv.PrevLine)
|
|
|
- td_k := FilterContactKey(td_kv.Key)
|
|
|
- td_k_length := len([]rune(td_k))
|
|
|
- if td_k_length < 2 || td_k_length > 15 {
|
|
|
- continue
|
|
|
- }
|
|
|
- isContinue := ContactInfoMustReg.MatchString(td_k)
|
|
|
- if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
|
|
|
- if len(indexMap) > 0 {
|
|
|
- indexMap = map[int]string{}
|
|
|
- break LL
|
|
|
- }
|
|
|
- isCanAddToIndexMap = true
|
|
|
- }
|
|
|
- if isContinue {
|
|
|
- continue
|
|
|
- }
|
|
|
- if len(indexMap) == 0 {
|
|
|
- for _, k := range HasOrderContactType(td_k) {
|
|
|
- if !ContactType[k].MatchString(td_k) {
|
|
|
- continue
|
|
|
- }
|
|
|
- if isCanAddToIndexMap && len(indexMap) == 0 {
|
|
|
- indexMap[0] = k
|
|
|
- break
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if len(indexMap) > 0 {
|
|
|
- tdAscFind = false
|
|
|
- }
|
|
|
+ tdAscFind = tn.hasIndexMap(thisTdKvs, &indexMap, tdAscFind)
|
|
|
}
|
|
|
prevKey := ""
|
|
|
oldIndexMapLength := len(indexMap)
|
|
|
thidTdIndex := td_index
|
|
|
- notmatchCount := 0
|
|
|
+ //notmatchCount := 0
|
|
|
kvTitle := ""
|
|
|
for _, td_kv := range thisTdKvs {
|
|
|
//u.Debug(td_kv.Key, td_kv.Value, td_kv.Title)
|
|
|
iscontinue := false
|
|
|
td_v := td_kv.Value
|
|
|
- td_k := FilterContactKey(td_kv.Key)
|
|
|
+ td_k := FilterContactKey(td_kv.Key) //带括号()[]的采购单位,代理机构处理
|
|
|
td_k_length := len([]rune(td_k))
|
|
|
- //
|
|
|
+ if td_k_length < 3 || td_k_length > 15 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ //都为正序查询
|
|
|
if allAscFind && tdAscFind {
|
|
|
- for _, k := range HasOrderContactType(td_k) {
|
|
|
- if td_k_length < 3 || td_k_length > 15 {
|
|
|
- continue
|
|
|
- }
|
|
|
- if !ContactType[k].MatchString(td_k) {
|
|
|
- matchCount++
|
|
|
- continue
|
|
|
- }
|
|
|
- if weightMap[k] == nil {
|
|
|
- weightMap[k] = map[string]interface{}{}
|
|
|
- }
|
|
|
- if ContactInfoVagueReg.MatchString(td_k) {
|
|
|
- if matchMap[k] == nil {
|
|
|
- matchMap[k] = map[string]bool{}
|
|
|
- }
|
|
|
- isAddToMatchMap := true
|
|
|
- if !strings.HasSuffix(td_k, "方式") {
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
- if len(kTag) == 1 {
|
|
|
- tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
- if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
- isAddToMatchMap = false
|
|
|
- }
|
|
|
- if td.SortKV.Map[tagVal] != nil {
|
|
|
- if weightMap[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][tagVal].(int)) {
|
|
|
- weightMap[k][tagVal] = weightVal.(int)
|
|
|
- td.SortKV.AddKey(tagVal, td_v)
|
|
|
- thisTrHasMatch = true
|
|
|
- }
|
|
|
- } else {
|
|
|
- weightMap[k][tagVal] = weightVal.(int)
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
|
|
|
- matchMap[k][ContactInfoVagueReg.FindString(td_k)] = true
|
|
|
- }
|
|
|
- } else if k == "采购单位" { //打标签,权重高的重新覆盖
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
|
|
|
- tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
- if tagVal == k {
|
|
|
- if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
|
|
|
- weightMap[k][k] = weightVal.(int)
|
|
|
- matchMap[k] = map[string]bool{}
|
|
|
- indexMap = map[int]string{}
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if u.IsMapHasValue(k, indexMap) {
|
|
|
- thisTrHasMatch = true
|
|
|
- iscontinue = true
|
|
|
- continue
|
|
|
- }
|
|
|
- if reCreate {
|
|
|
- indexMap = map[int]string{}
|
|
|
- reCreate = false
|
|
|
- }
|
|
|
- indexMap[thidTdIndex] = k
|
|
|
- iscontinue = true
|
|
|
- thisTrHasMatch = true
|
|
|
- thidTdIndex++
|
|
|
- break
|
|
|
- }
|
|
|
- if len(indexMap) == 0 {
|
|
|
- prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
|
|
|
- for k, v := range ContactType { //采购单位,代理机构正则
|
|
|
- if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
|
|
|
- indexMap[thidTdIndex] = k
|
|
|
- thisTrHasMatch = true
|
|
|
- thidTdIndex++
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if len(indexMap) == 0 {
|
|
|
- if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
|
|
|
- thidTdIndex = 0
|
|
|
- matchMap = map[string]map[string]bool{}
|
|
|
- indexMap = map[int]string{1: titleMatchType}
|
|
|
- }
|
|
|
- }
|
|
|
+ //都为正序查询处理
|
|
|
+ matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex)
|
|
|
}
|
|
|
if iscontinue {
|
|
|
continue
|
|
@@ -2659,33 +2486,27 @@ L:
|
|
|
}
|
|
|
kvTitle = td_kv.Title
|
|
|
//u.Debug(indexMap, td_k, td_v, matchMap)
|
|
|
+ if td_k_length < 2 || td_k_length > 10 {
|
|
|
+ continue
|
|
|
+ }
|
|
|
if len(indexMap) > 0 {
|
|
|
- if td_k_length < 2 || td_k_length > 10 {
|
|
|
- continue
|
|
|
- }
|
|
|
- modle := 0
|
|
|
- if len(thisTdKvs) == 1 {
|
|
|
- if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
|
|
|
- modle = 1
|
|
|
- } else {
|
|
|
- modle = 2
|
|
|
- }
|
|
|
- }
|
|
|
+ //没有识别到采购单位联系人、联系电话、代理机构联系人、联系电话
|
|
|
if !ContactInfoMustReg.MatchString(td_k) {
|
|
|
- notmatchCount++
|
|
|
- if notmatchCount < len(indexMap)*2 && false {
|
|
|
- notmatchCount = 0
|
|
|
- thidTdIndex = 0
|
|
|
- indexMap = map[int]string{}
|
|
|
- matchMap = map[string]map[string]bool{}
|
|
|
- }
|
|
|
- if mustMatchFirst {
|
|
|
+ //notmatchCount++
|
|
|
+ //if notmatchCount < len(indexMap)*2 && false {//false???????
|
|
|
+ // notmatchCount = 0
|
|
|
+ // thidTdIndex = 0
|
|
|
+ // indexMap = map[int]string{}
|
|
|
+ // matchMap = map[string]map[string]bool{}
|
|
|
+ //}
|
|
|
+ if mustMatchFirst { //indexMap初始值大于0
|
|
|
break L
|
|
|
}
|
|
|
continue
|
|
|
}
|
|
|
reCreate = true
|
|
|
index := td_index
|
|
|
+ //oldIndexMapLength原来的indexMap等于0 ,现在的indexMap大于1
|
|
|
if oldIndexMapLength == 0 && len(indexMap) > 1 {
|
|
|
if prevKey != td_k {
|
|
|
prevKey = td_k
|
|
@@ -2694,11 +2515,13 @@ L:
|
|
|
index++
|
|
|
}
|
|
|
}
|
|
|
+ //kv.value为空
|
|
|
if filterValue.MatchString(td_v) {
|
|
|
thisTrHasMatch = true
|
|
|
continue
|
|
|
}
|
|
|
//u.Debug(indexMap, td_k, td_v, matchMap, index, modle)
|
|
|
+ //myContactType
|
|
|
myContactType := indexMap[index]
|
|
|
if myContactType == "" && len(indexMap) == 1 {
|
|
|
_, onlyContactType := u.FirstKeyValueInMap(indexMap)
|
|
@@ -2719,36 +2542,8 @@ L:
|
|
|
continue
|
|
|
}
|
|
|
thisTrHasMatch = true
|
|
|
- if modle == 1 {
|
|
|
- td.Text = myContactType + td_k
|
|
|
- td.Val = td.Text
|
|
|
- } else {
|
|
|
- //
|
|
|
- if !strings.HasSuffix(td_k, "方式") {
|
|
|
- _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
- if len(kTag) == 1 {
|
|
|
- tagVal, _ := u.FirstKeyValueInMap(kTag)
|
|
|
- if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
- continue
|
|
|
- }
|
|
|
- if contactTypeTagMap[myContactType] == nil {
|
|
|
- contactTypeTagMap[myContactType] = map[string][]interface{}{}
|
|
|
- }
|
|
|
- myOldKeyArray := contactTypeTagMap[myContactType][tagVal]
|
|
|
- if myOldKeyArray != nil {
|
|
|
- tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
|
|
|
- } else {
|
|
|
- contactTypeTagMap[myContactType][tagVal] = make([]interface{}, 3)
|
|
|
- }
|
|
|
- if weightMap[myContactType] == nil {
|
|
|
- weightMap[myContactType] = map[string]interface{}{}
|
|
|
- }
|
|
|
- weightMap[myContactType][tagVal] = 1
|
|
|
- contactTypeTagMap[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
|
|
|
- }
|
|
|
- }
|
|
|
- td.SortKV.AddKey(myContactType+td_k, td_v)
|
|
|
- }
|
|
|
+ //modle
|
|
|
+ modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index)
|
|
|
}
|
|
|
}
|
|
|
//u.Debug(td.SortKV.Map)
|
|
@@ -2775,6 +2570,280 @@ L:
|
|
|
// }
|
|
|
}
|
|
|
|
|
|
+//modle
|
|
|
+func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int) {
|
|
|
+ modle := 0
|
|
|
+ if len(thisTdKvs) == 1 {
|
|
|
+ if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
|
|
|
+ modle = 1
|
|
|
+ } else {
|
|
|
+ modle = 2
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if modle == 1 {
|
|
|
+ td.Text = myContactType + td_k
|
|
|
+ td.Val = td.Text
|
|
|
+ } else {
|
|
|
+ //
|
|
|
+ if !strings.HasSuffix(td_k, "方式") {
|
|
|
+ _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
+ if len(kTag) == 1 {
|
|
|
+ tagVal, _ := u.FirstKeyValueInMap(kTag)
|
|
|
+ if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if (*contactTypeTagMap)[myContactType] == nil {
|
|
|
+ (*contactTypeTagMap)[myContactType] = map[string][]interface{}{}
|
|
|
+ }
|
|
|
+ myOldKeyArray := (*contactTypeTagMap)[myContactType][tagVal]
|
|
|
+ if myOldKeyArray != nil {
|
|
|
+ tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
|
|
|
+ } else {
|
|
|
+ (*contactTypeTagMap)[myContactType][tagVal] = make([]interface{}, 3)
|
|
|
+ }
|
|
|
+ if (*weightMap)[myContactType] == nil {
|
|
|
+ (*weightMap)[myContactType] = map[string]interface{}{}
|
|
|
+ }
|
|
|
+ (*weightMap)[myContactType][tagVal] = 1
|
|
|
+ (*contactTypeTagMap)[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
|
|
|
+ }
|
|
|
+ }
|
|
|
+ td.SortKV.AddKey(myContactType+td_k, td_v)
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+//都为正序查询
|
|
|
+func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
|
|
|
+ for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
|
|
|
+ if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ matchCount++
|
|
|
+ if weightMap[k] == nil {
|
|
|
+ weightMap[k] = map[string]interface{}{}
|
|
|
+ }
|
|
|
+ //匹配到进行处理
|
|
|
+ if ContactInfoVagueReg.MatchString(td_k) {
|
|
|
+ thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
|
|
|
+ } else if k == "采购单位" { //打标签,权重高的重新覆盖
|
|
|
+ _, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
|
|
|
+ tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
+ if tagVal == k {
|
|
|
+ if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
|
|
|
+ weightMap[k][k] = weightVal.(int)
|
|
|
+ matchMap[k] = map[string]bool{}
|
|
|
+ indexMap = map[int]string{}
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if u.IsMapHasValue(k, indexMap) { //map中是否存在value
|
|
|
+ thisTrHasMatch = true
|
|
|
+ iscontinue = true
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if reCreate {
|
|
|
+ indexMap = map[int]string{}
|
|
|
+ reCreate = false
|
|
|
+ }
|
|
|
+ indexMap[thidTdIndex] = k
|
|
|
+ iscontinue = true
|
|
|
+ thisTrHasMatch = true
|
|
|
+ thidTdIndex++
|
|
|
+ break
|
|
|
+ }
|
|
|
+ if len(indexMap) == 0 && td_kv.PrevLine != "" {
|
|
|
+ //td_kv.PrevLine
|
|
|
+ prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
|
|
|
+ for k, v := range ContactType { //采购单位,代理机构正则
|
|
|
+ if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
|
|
|
+ indexMap[thidTdIndex] = k
|
|
|
+ thisTrHasMatch = true
|
|
|
+ thidTdIndex++
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(indexMap) == 0 && td_kv.Title != "" {
|
|
|
+ //td_kv.Title
|
|
|
+ if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
|
|
|
+ thidTdIndex = 0
|
|
|
+ matchMap = map[string]map[string]bool{}
|
|
|
+ indexMap = map[int]string{1: titleMatchType}
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex
|
|
|
+}
|
|
|
+
|
|
|
+//匹配到进行处理
|
|
|
+func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool) bool {
|
|
|
+ if (*matchMap)[k] == nil {
|
|
|
+ (*matchMap)[k] = map[string]bool{}
|
|
|
+ }
|
|
|
+ isAddToMatchMap := true
|
|
|
+ if !strings.HasSuffix(td_k, "方式") {
|
|
|
+ _, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
|
|
|
+ if len(kTag) == 1 {
|
|
|
+ tagVal, weightVal := u.FirstKeyValueInMap(kTag)
|
|
|
+ if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
|
|
|
+ isAddToMatchMap = false
|
|
|
+ }
|
|
|
+ if td.SortKV.Map[tagVal] != nil {
|
|
|
+ if (*weightMap)[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= (*weightMap)[k][tagVal].(int)) {
|
|
|
+ (*weightMap)[k][tagVal] = weightVal.(int)
|
|
|
+ td.SortKV.AddKey(tagVal, td_v)
|
|
|
+ thisTrHasMatch = true
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ (*weightMap)[k][tagVal] = weightVal.(int)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
|
|
|
+ (*matchMap)[k][ContactInfoVagueReg.FindString(td_k)] = true
|
|
|
+ }
|
|
|
+ return thisTrHasMatch
|
|
|
+}
|
|
|
+
|
|
|
+//采购人在联系人、电话后面的处理
|
|
|
+func (tn *Table) hasIndexMap(thisTdKvs []*u.Kv, indexMap *map[int]string, tdAscFind bool) bool {
|
|
|
+ //采购人在联系人、电话后面的处理
|
|
|
+ isCanAddToIndexMap := false
|
|
|
+LL:
|
|
|
+ for _, td_kv := range thisTdKvs {
|
|
|
+ //1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
|
|
|
+ goOnFunc, isContinue, td_k := tn.tdKV(td_kv.Key, nil, &isCanAddToIndexMap, indexMap, "LL")
|
|
|
+ if !goOnFunc {
|
|
|
+ break LL
|
|
|
+ }
|
|
|
+ if isContinue {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if len(*indexMap) == 0 {
|
|
|
+ for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
|
|
|
+ if !ContactType[k].MatchString(td_k) {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if isCanAddToIndexMap && len(*indexMap) == 0 {
|
|
|
+ (*indexMap)[0] = k
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(*indexMap) > 0 {
|
|
|
+ tdAscFind = false
|
|
|
+ }
|
|
|
+ return tdAscFind
|
|
|
+}
|
|
|
+
|
|
|
+//和|以?及|与|、多个词和在一起
|
|
|
+func (tn *Table) tdsMultipleWords(jumpNextTd bool, td *TD, td_index int, tr *TR, thisTrHasMatch bool, indexMap map[int]string) (jump, thisTr bool) {
|
|
|
+ if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
|
|
|
+ thisTdVals := regSplit.Split(td.Text, -1)
|
|
|
+ nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
|
|
|
+ if len(thisTdVals) == len(nextTdVals) { //本次和下个长度相等
|
|
|
+ for _, k := range HasOrderContactType(td.Text) { //采购单位,代理机构
|
|
|
+ if ContactType[k].MatchString(td.Text) { //采购单位,代理机构
|
|
|
+ for thisTdVals_k, thisTdVals_v := range thisTdVals {
|
|
|
+ thisTdVals_v = strings.TrimSpace(thisTdVals_v)
|
|
|
+ if ContactType[k].MatchString(thisTdVals_v) { //采购单位,代理机构
|
|
|
+ thisTrHasMatch = true
|
|
|
+ tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if !ContactInfoMustReg.MatchString(thisTdVals_v) {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ jumpNextTd = true
|
|
|
+ thisTrHasMatch = true
|
|
|
+ tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
|
|
|
+ }
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(indexMap) > 0 {
|
|
|
+ _, onlyContactType := u.FirstKeyValueInMap(indexMap)
|
|
|
+ if myContactType, _ := onlyContactType.(string); myContactType != "" {
|
|
|
+ for thisTdVals_k, thisTdVals_v := range thisTdVals {
|
|
|
+ thisTdVals_v = strings.TrimSpace(thisTdVals_v)
|
|
|
+ if ContactInfoMustReg.MatchString(thisTdVals_v) {
|
|
|
+ jumpNextTd = true
|
|
|
+ thisTrHasMatch = true
|
|
|
+ tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ jumpNextTd = false
|
|
|
+ }
|
|
|
+ return jumpNextTd, thisTrHasMatch
|
|
|
+}
|
|
|
+
|
|
|
+//采购单位,代理机构
|
|
|
+func (tn *Table) tdHasOrderContactType(td_k string, indexMap *map[int]string, tr *TR, prevCanAddToIndexMap, isCanAddToIndexMap, matchPrevFlag *bool, td_index int) (gotoFunc bool) {
|
|
|
+ for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
|
|
|
+ if !ContactType[k].MatchString(td_k) {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if len(*indexMap) == 0 {
|
|
|
+ if (*isCanAddToIndexMap) || (*prevCanAddToIndexMap && len(tr.TDs) == 1) {
|
|
|
+ myPrevTdVal := ""
|
|
|
+ if td_index-2 >= 0 {
|
|
|
+ myPrevTdVal = tr.TDs[td_index-2].Val
|
|
|
+ }
|
|
|
+ if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
|
|
|
+ (*matchPrevFlag) = true
|
|
|
+ }
|
|
|
+ (*indexMap)[0] = k
|
|
|
+ break
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ (*indexMap) = map[int]string{}
|
|
|
+ return false
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return true
|
|
|
+}
|
|
|
+
|
|
|
+//1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
|
|
|
+func (tn *Table) tdKV(key string, matchPrevFlag, isCanAddToIndexMap *bool, indexMap *map[int]string, gotoName string) (goOnFunc, isContinue bool, td_k string) {
|
|
|
+ td_k = FilterContactKey(key) //带括号的()[]【】采购单位,代理机构处理
|
|
|
+ td_k_length := len([]rune(td_k))
|
|
|
+ if td_k_length < 2 || td_k_length > 15 {
|
|
|
+ return true, true, "" //继续执行,跳过当前循环
|
|
|
+ }
|
|
|
+ isContinue = ContactInfoMustReg.MatchString(td_k) //识别采购单位联系人、联系电话、代理机构联系人、联系电话
|
|
|
+ if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
|
|
|
+ if gotoName == "LS" && !(*matchPrevFlag) && len(*indexMap) > 0 {
|
|
|
+ (*indexMap) = map[int]string{}
|
|
|
+ return false, false, "" //中断外层循环
|
|
|
+ }
|
|
|
+ if gotoName == "LL" && len(*indexMap) > 0 {
|
|
|
+ (*indexMap) = map[int]string{}
|
|
|
+ return false, false, ""
|
|
|
+ }
|
|
|
+ (*isCanAddToIndexMap) = true
|
|
|
+ }
|
|
|
+ return true, false, td_k //继续执行,不结束当前循环,返回处理后的值
|
|
|
+}
|
|
|
+
|
|
|
+//获取td冒号kv
|
|
|
+func (tn *Table) tdkv(td *TD) []*u.Kv {
|
|
|
+ thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取冒号kv
|
|
|
+ //获取冒号
|
|
|
+ if len(thisTdKvs) == 0 {
|
|
|
+ tdValue := regReplAllSpace.ReplaceAllString(td.Text, "") //去除空格换行
|
|
|
+ if tdValue != "" && len([]rune(tdValue)) < 10 {
|
|
|
+ thisTdKvs = append(thisTdKvs, &u.Kv{
|
|
|
+ Key: tdValue,
|
|
|
+ Value: "",
|
|
|
+ })
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return thisTdKvs
|
|
|
+}
|
|
|
+
|
|
|
func (table *Table) analyBrand() {
|
|
|
//5c2d8c05a5cb26b9b782572b
|
|
|
//产品名称 品牌 规格 单价 单位 数量 小计 质保期
|
|
@@ -2977,6 +3046,7 @@ func (table *Table) analyBrand() {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
//初始化lineMapArr,lineMap
|
|
|
func initLineMapLineMapArr(table *Table, lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
|
|
|
for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
|