Browse Source

TdContactFormat()注释

fengweiqiang 6 years ago
parent
commit
19ea3cdfa4
2 changed files with 334 additions and 264 deletions
  1. 323 253
      src/jy/pretreated/analytable.go
  2. 11 11
      src/jy/pretreated/tablev2.go

+ 323 - 253
src/jy/pretreated/analytable.go

@@ -784,10 +784,12 @@ func (table *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 			//分割表格
 			table.bSplit(n, ts)
 			//对没有表头表格的处理
-			_, _, b := CheckMultiPackage(table.Tag, "")
-			if b {
-				table.StandKV["项目名称"] = table.Tag
-				table.StandKVWeight["项目名称"] = -100
+			if table.Tag != "" {
+				_, _, b := CheckMultiPackage(table.Tag, "")
+				if b {
+					table.StandKV["项目名称"] = table.Tag
+					table.StandKVWeight["项目名称"] = -100
+				}
 			}
 			table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
 			//开始查找kv,核心模块,table.SortKV
@@ -2383,7 +2385,8 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
 	contactTypeTagMap := map[string]map[string][]interface{}{}
 	//u.Debug(mustMatchFirst, indexMap, matchMap)
 	notMatchTrCount := 0
-	allAscFind := true
+	allAscFind := true //开启正序查询
+	//涉及变量allAscFind,indexMap
 	if len(indexMap) == 0 {
 		isCanAddToIndexMap := false
 		matchPrevFlag := false
@@ -2391,39 +2394,21 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
 	LS:
 		for _, tr := range tn.TRs {
 			for td_index, td := range tr.TDs {
-				thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取有序的kv
-				if len(thisTdKvs) == 0 {
-					tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
-					if tdValue != "" && len([]rune(tdValue)) < 10 {
-						thisTdKvs = append(thisTdKvs, &u.Kv{
-							Key:   tdValue,
-							Value: "",
-						})
-					}
-				}
+				thisTdKvs := tn.tdkv(td) //获取td冒号kv
 				if len(thisTdKvs) != 1 {
 					continue
 				}
-				//采购人在联系人、电话后面的处理//采购单位,代理机构
-				td_k := FilterContactKey(thisTdKvs[0].Key)
-				td_k_length := len([]rune(td_k))
-				if td_k_length < 2 || td_k_length > 15 {
-					continue
-				}
-				isContinue := ContactInfoMustReg.MatchString(td_k)
-				if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
-					if !matchPrevFlag && len(indexMap) > 0 {
-						indexMap = map[int]string{}
-						break LS
-					}
-					isCanAddToIndexMap = true
+				//1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
+				goOnFunc, isContinue, td_k := tn.tdKV(thisTdKvs[0].Key, &matchPrevFlag, &isCanAddToIndexMap, &indexMap, "LS")
+				if !goOnFunc {
+					break LS
 				}
 				if isContinue {
 					continue
 				}
 				//采购单位,代理机构
 				for _, k := range HasOrderContactType(td_k) {
-					if !ContactType[k].MatchString(td_k) {
+					if !ContactType[k].MatchString(td_k) { //不是采购单位,代理机构跳过
 						continue
 					}
 					if len(indexMap) == 0 {
@@ -2431,9 +2416,9 @@ func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
 							myPrevTdVal := ""
 							if td_index-2 >= 0 {
 								myPrevTdVal = tr.TDs[td_index-2].Val
-							}
-							if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
-								matchPrevFlag = true
+								if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
+									matchPrevFlag = true
+								}
 							}
 							indexMap[0] = k
 							break
@@ -2458,195 +2443,37 @@ L:
 		jumpNextTd := false
 		for td_index, td := range tr.TDs {
 			//和|以?及|与|、多个词和在一起
-			if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
-				thisTdVals := regSplit.Split(td.Text, -1)
-				nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
-				if len(thisTdVals) == len(nextTdVals) {
-					isHandle := false
-					for _, k := range HasOrderContactType(td.Text) {
-						if ContactType[k].MatchString(td.Text) {
-							for thisTdVals_k, thisTdVals_v := range thisTdVals {
-								thisTdVals_v = strings.TrimSpace(thisTdVals_v)
-								if ContactType[k].MatchString(thisTdVals_v) {
-									thisTrHasMatch = true
-									tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
-									continue
-								}
-								if !ContactInfoMustReg.MatchString(thisTdVals_v) {
-									continue
-								}
-								jumpNextTd = true
-								thisTrHasMatch = true
-								tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
-							}
-							break
-						}
-					}
-					if !isHandle && len(indexMap) > 0 {
-						_, onlyContactType := u.FirstKeyValueInMap(indexMap)
-						if myContactType, _ := onlyContactType.(string); myContactType != "" {
-							for thisTdVals_k, thisTdVals_v := range thisTdVals {
-								thisTdVals_v = strings.TrimSpace(thisTdVals_v)
-								if ContactInfoMustReg.MatchString(thisTdVals_v) {
-									jumpNextTd = true
-									thisTrHasMatch = true
-									tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
-								}
-							}
-						}
-					}
-				}
-			} else {
-				jumpNextTd = false
-			}
-			///////////////////////////////////////
-			thisTdKvs := kvAfterDivideBlock(td.Text, 3, tn.TableResult.RuleBlock) //分块之后的kv
+			jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
+			//分块之后的kv
+			thisTdKvs := kvAfterDivideBlock(td.Text, 3, tn.TableResult.RuleBlock)
 			if len(thisTdKvs) == 0 {
-				thisTdKvs = colonkvEntity.GetKvs(td.Text, tn.Desc, 2)
-			}
-			if len(thisTdKvs) == 0 {
-				tdValue := regReplAllSpace.ReplaceAllString(td.Text, "")
-				if tdValue != "" && len([]rune(tdValue)) < 15 {
-					thisTdKvs = append(thisTdKvs, &u.Kv{
-						Key:   tdValue,
-						Value: "",
-					})
-				}
+				thisTdKvs = tn.tdkv(td) //获取冒号kv
 			}
-			tdAscFind := true
+			tdAscFind := true //开启td正序查询
 			if len(thisTdKvs) == 0 {
 				continue
 			} else if allAscFind && len(thisTdKvs) >= 3 && len(indexMap) == 0 {
 				//采购人在联系人、电话后面的处理
-				isCanAddToIndexMap := false
-			LL:
-				for _, td_kv := range thisTdKvs {
-					//u.Debug(td_kv.PrevLine)
-					td_k := FilterContactKey(td_kv.Key)
-					td_k_length := len([]rune(td_k))
-					if td_k_length < 2 || td_k_length > 15 {
-						continue
-					}
-					isContinue := ContactInfoMustReg.MatchString(td_k)
-					if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
-						if len(indexMap) > 0 {
-							indexMap = map[int]string{}
-							break LL
-						}
-						isCanAddToIndexMap = true
-					}
-					if isContinue {
-						continue
-					}
-					if len(indexMap) == 0 {
-						for _, k := range HasOrderContactType(td_k) {
-							if !ContactType[k].MatchString(td_k) {
-								continue
-							}
-							if isCanAddToIndexMap && len(indexMap) == 0 {
-								indexMap[0] = k
-								break
-							}
-						}
-					}
-				}
-				if len(indexMap) > 0 {
-					tdAscFind = false
-				}
+				tdAscFind = tn.hasIndexMap(thisTdKvs, &indexMap, tdAscFind)
 			}
 			prevKey := ""
 			oldIndexMapLength := len(indexMap)
 			thidTdIndex := td_index
-			notmatchCount := 0
+			//notmatchCount := 0
 			kvTitle := ""
 			for _, td_kv := range thisTdKvs {
 				//u.Debug(td_kv.Key, td_kv.Value, td_kv.Title)
 				iscontinue := false
 				td_v := td_kv.Value
-				td_k := FilterContactKey(td_kv.Key)
+				td_k := FilterContactKey(td_kv.Key) //带括号()[]的采购单位,代理机构处理
 				td_k_length := len([]rune(td_k))
-				//
+				if td_k_length < 3 || td_k_length > 15 {
+					continue
+				}
+				//都为正序查询
 				if allAscFind && tdAscFind {
-					for _, k := range HasOrderContactType(td_k) {
-						if td_k_length < 3 || td_k_length > 15 {
-							continue
-						}
-						if !ContactType[k].MatchString(td_k) {
-							matchCount++
-							continue
-						}
-						if weightMap[k] == nil {
-							weightMap[k] = map[string]interface{}{}
-						}
-						if ContactInfoVagueReg.MatchString(td_k) {
-							if matchMap[k] == nil {
-								matchMap[k] = map[string]bool{}
-							}
-							isAddToMatchMap := true
-							if !strings.HasSuffix(td_k, "方式") {
-								_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
-								if len(kTag) == 1 {
-									tagVal, weightVal := u.FirstKeyValueInMap(kTag)
-									if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
-										isAddToMatchMap = false
-									}
-									if td.SortKV.Map[tagVal] != nil {
-										if weightMap[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][tagVal].(int)) {
-											weightMap[k][tagVal] = weightVal.(int)
-											td.SortKV.AddKey(tagVal, td_v)
-											thisTrHasMatch = true
-										}
-									} else {
-										weightMap[k][tagVal] = weightVal.(int)
-									}
-								}
-							}
-							if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
-								matchMap[k][ContactInfoVagueReg.FindString(td_k)] = true
-							}
-						} else if k == "采购单位" { //打标签,权重高的重新覆盖
-							_, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
-							tagVal, weightVal := u.FirstKeyValueInMap(kTag)
-							if tagVal == k {
-								if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
-									weightMap[k][k] = weightVal.(int)
-									matchMap[k] = map[string]bool{}
-									indexMap = map[int]string{}
-								}
-							}
-						}
-						if u.IsMapHasValue(k, indexMap) {
-							thisTrHasMatch = true
-							iscontinue = true
-							continue
-						}
-						if reCreate {
-							indexMap = map[int]string{}
-							reCreate = false
-						}
-						indexMap[thidTdIndex] = k
-						iscontinue = true
-						thisTrHasMatch = true
-						thidTdIndex++
-						break
-					}
-					if len(indexMap) == 0 {
-						prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
-						for k, v := range ContactType { //采购单位,代理机构正则
-							if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
-								indexMap[thidTdIndex] = k
-								thisTrHasMatch = true
-								thidTdIndex++
-							}
-						}
-					}
-					if len(indexMap) == 0 {
-						if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
-							thidTdIndex = 0
-							matchMap = map[string]map[string]bool{}
-							indexMap = map[int]string{1: titleMatchType}
-						}
-					}
+					//都为正序查询处理
+					matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex)
 				}
 				if iscontinue {
 					continue
@@ -2659,33 +2486,27 @@ L:
 				}
 				kvTitle = td_kv.Title
 				//u.Debug(indexMap, td_k, td_v, matchMap)
+				if td_k_length < 2 || td_k_length > 10 {
+					continue
+				}
 				if len(indexMap) > 0 {
-					if td_k_length < 2 || td_k_length > 10 {
-						continue
-					}
-					modle := 0
-					if len(thisTdKvs) == 1 {
-						if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
-							modle = 1
-						} else {
-							modle = 2
-						}
-					}
+					//没有识别到采购单位联系人、联系电话、代理机构联系人、联系电话
 					if !ContactInfoMustReg.MatchString(td_k) {
-						notmatchCount++
-						if notmatchCount < len(indexMap)*2 && false {
-							notmatchCount = 0
-							thidTdIndex = 0
-							indexMap = map[int]string{}
-							matchMap = map[string]map[string]bool{}
-						}
-						if mustMatchFirst {
+						//notmatchCount++
+						//if notmatchCount < len(indexMap)*2 && false {//false???????
+						//	notmatchCount = 0
+						//	thidTdIndex = 0
+						//	indexMap = map[int]string{}
+						//	matchMap = map[string]map[string]bool{}
+						//}
+						if mustMatchFirst { //indexMap初始值大于0
 							break L
 						}
 						continue
 					}
 					reCreate = true
 					index := td_index
+					//oldIndexMapLength原来的indexMap等于0 ,现在的indexMap大于1
 					if oldIndexMapLength == 0 && len(indexMap) > 1 {
 						if prevKey != td_k {
 							prevKey = td_k
@@ -2694,11 +2515,13 @@ L:
 							index++
 						}
 					}
+					//kv.value为空
 					if filterValue.MatchString(td_v) {
 						thisTrHasMatch = true
 						continue
 					}
 					//u.Debug(indexMap, td_k, td_v, matchMap, index, modle)
+					//myContactType
 					myContactType := indexMap[index]
 					if myContactType == "" && len(indexMap) == 1 {
 						_, onlyContactType := u.FirstKeyValueInMap(indexMap)
@@ -2719,36 +2542,8 @@ L:
 						continue
 					}
 					thisTrHasMatch = true
-					if modle == 1 {
-						td.Text = myContactType + td_k
-						td.Val = td.Text
-					} else {
-						//
-						if !strings.HasSuffix(td_k, "方式") {
-							_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
-							if len(kTag) == 1 {
-								tagVal, _ := u.FirstKeyValueInMap(kTag)
-								if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
-									continue
-								}
-								if contactTypeTagMap[myContactType] == nil {
-									contactTypeTagMap[myContactType] = map[string][]interface{}{}
-								}
-								myOldKeyArray := contactTypeTagMap[myContactType][tagVal]
-								if myOldKeyArray != nil {
-									tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
-								} else {
-									contactTypeTagMap[myContactType][tagVal] = make([]interface{}, 3)
-								}
-								if weightMap[myContactType] == nil {
-									weightMap[myContactType] = map[string]interface{}{}
-								}
-								weightMap[myContactType][tagVal] = 1
-								contactTypeTagMap[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
-							}
-						}
-						td.SortKV.AddKey(myContactType+td_k, td_v)
-					}
+					//modle
+					modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index)
 				}
 			}
 			//u.Debug(td.SortKV.Map)
@@ -2775,6 +2570,280 @@ L:
 	//	}
 }
 
+//modle
+func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int) {
+	modle := 0
+	if len(thisTdKvs) == 1 {
+		if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
+			modle = 1
+		} else {
+			modle = 2
+		}
+	}
+	if modle == 1 {
+		td.Text = myContactType + td_k
+		td.Val = td.Text
+	} else {
+		//
+		if !strings.HasSuffix(td_k, "方式") {
+			_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts, 3)
+			if len(kTag) == 1 {
+				tagVal, _ := u.FirstKeyValueInMap(kTag)
+				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
+					return
+				}
+				if (*contactTypeTagMap)[myContactType] == nil {
+					(*contactTypeTagMap)[myContactType] = map[string][]interface{}{}
+				}
+				myOldKeyArray := (*contactTypeTagMap)[myContactType][tagVal]
+				if myOldKeyArray != nil {
+					tn.TRs[myOldKeyArray[0].(int)].TDs[myOldKeyArray[1].(int)].SortKV.RemoveKey(myContactType + myOldKeyArray[2].(string))
+				} else {
+					(*contactTypeTagMap)[myContactType][tagVal] = make([]interface{}, 3)
+				}
+				if (*weightMap)[myContactType] == nil {
+					(*weightMap)[myContactType] = map[string]interface{}{}
+				}
+				(*weightMap)[myContactType][tagVal] = 1
+				(*contactTypeTagMap)[myContactType][tagVal] = []interface{}{tr_index, td_index, td_k}
+			}
+		}
+		td.SortKV.AddKey(myContactType+td_k, td_v)
+	}
+}
+
+//都为正序查询
+func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
+	for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
+		if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构
+			continue
+		}
+		matchCount++
+		if weightMap[k] == nil {
+			weightMap[k] = map[string]interface{}{}
+		}
+		//匹配到进行处理
+		if ContactInfoVagueReg.MatchString(td_k) {
+			thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
+		} else if k == "采购单位" { //打标签,权重高的重新覆盖
+			_, kTag := KvTagsToKV([]*u.Kv{td_kv}, "", []string{"采购单位"}, 3)
+			tagVal, weightVal := u.FirstKeyValueInMap(kTag)
+			if tagVal == k {
+				if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
+					weightMap[k][k] = weightVal.(int)
+					matchMap[k] = map[string]bool{}
+					indexMap = map[int]string{}
+				}
+			}
+		}
+		if u.IsMapHasValue(k, indexMap) { //map中是否存在value
+			thisTrHasMatch = true
+			iscontinue = true
+			continue
+		}
+		if reCreate {
+			indexMap = map[int]string{}
+			reCreate = false
+		}
+		indexMap[thidTdIndex] = k
+		iscontinue = true
+		thisTrHasMatch = true
+		thidTdIndex++
+		break
+	}
+	if len(indexMap) == 0 && td_kv.PrevLine != "" {
+		//td_kv.PrevLine
+		prevLine := FilterSerial.ReplaceAllString(td_kv.PrevLine, "")
+		for k, v := range ContactType { //采购单位,代理机构正则
+			if u.IsArrayHasValue(prevLine, v.FindAllString(prevLine, -1)) {
+				indexMap[thidTdIndex] = k
+				thisTrHasMatch = true
+				thidTdIndex++
+			}
+		}
+	}
+	if len(indexMap) == 0 && td_kv.Title != "" {
+		//td_kv.Title
+		if titleMatchType := ContactTypeTitleMatch(td_kv.Title); titleMatchType != "" {
+			thidTdIndex = 0
+			matchMap = map[string]map[string]bool{}
+			indexMap = map[int]string{1: titleMatchType}
+		}
+	}
+	return matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex
+}
+
+//匹配到进行处理
+func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool) bool {
+	if (*matchMap)[k] == nil {
+		(*matchMap)[k] = map[string]bool{}
+	}
+	isAddToMatchMap := true
+	if !strings.HasSuffix(td_k, "方式") {
+		_, kTag := KvTagsToKV([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts, 3)
+		if len(kTag) == 1 {
+			tagVal, weightVal := u.FirstKeyValueInMap(kTag)
+			if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
+				isAddToMatchMap = false
+			}
+			if td.SortKV.Map[tagVal] != nil {
+				if (*weightMap)[k][tagVal] == nil || (weightVal != nil && weightVal.(int) >= (*weightMap)[k][tagVal].(int)) {
+					(*weightMap)[k][tagVal] = weightVal.(int)
+					td.SortKV.AddKey(tagVal, td_v)
+					thisTrHasMatch = true
+				}
+			} else {
+				(*weightMap)[k][tagVal] = weightVal.(int)
+			}
+		}
+	}
+	if isAddToMatchMap && !filterValue.MatchString(td_v) && td_v != "" {
+		(*matchMap)[k][ContactInfoVagueReg.FindString(td_k)] = true
+	}
+	return thisTrHasMatch
+}
+
+//采购人在联系人、电话后面的处理
+func (tn *Table) hasIndexMap(thisTdKvs []*u.Kv, indexMap *map[int]string, tdAscFind bool) bool {
+	//采购人在联系人、电话后面的处理
+	isCanAddToIndexMap := false
+LL:
+	for _, td_kv := range thisTdKvs {
+		//1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
+		goOnFunc, isContinue, td_k := tn.tdKV(td_kv.Key, nil, &isCanAddToIndexMap, indexMap, "LL")
+		if !goOnFunc {
+			break LL
+		}
+		if isContinue {
+			continue
+		}
+		if len(*indexMap) == 0 {
+			for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
+				if !ContactType[k].MatchString(td_k) {
+					continue
+				}
+				if isCanAddToIndexMap && len(*indexMap) == 0 {
+					(*indexMap)[0] = k
+					break
+				}
+			}
+		}
+	}
+	if len(*indexMap) > 0 {
+		tdAscFind = false
+	}
+	return tdAscFind
+}
+
+//和|以?及|与|、多个词和在一起
+func (tn *Table) tdsMultipleWords(jumpNextTd bool, td *TD, td_index int, tr *TR, thisTrHasMatch bool, indexMap map[int]string) (jump, thisTr bool) {
+	if !jumpNextTd && len([]rune(td.Text)) >= 5 && len([]rune(td.Text)) <= 15 && regSplit.MatchString(td.Text) && td_index+1 < len(tr.TDs) {
+		thisTdVals := regSplit.Split(td.Text, -1)
+		nextTdVals := MultipleValueSplitReg.Split(tr.TDs[td_index+1].Val, -1)
+		if len(thisTdVals) == len(nextTdVals) { //本次和下个长度相等
+			for _, k := range HasOrderContactType(td.Text) { //采购单位,代理机构
+				if ContactType[k].MatchString(td.Text) { //采购单位,代理机构
+					for thisTdVals_k, thisTdVals_v := range thisTdVals {
+						thisTdVals_v = strings.TrimSpace(thisTdVals_v)
+						if ContactType[k].MatchString(thisTdVals_v) { //采购单位,代理机构
+							thisTrHasMatch = true
+							tr.TDs[td_index+1].SortKV.AddKey(thisTdVals_v, nextTdVals[thisTdVals_k])
+							continue
+						}
+						if !ContactInfoMustReg.MatchString(thisTdVals_v) {
+							continue
+						}
+						jumpNextTd = true
+						thisTrHasMatch = true
+						tr.TDs[td_index+1].SortKV.AddKey(k+thisTdVals_v, nextTdVals[thisTdVals_k])
+					}
+					break
+				}
+			}
+			if len(indexMap) > 0 {
+				_, onlyContactType := u.FirstKeyValueInMap(indexMap)
+				if myContactType, _ := onlyContactType.(string); myContactType != "" {
+					for thisTdVals_k, thisTdVals_v := range thisTdVals {
+						thisTdVals_v = strings.TrimSpace(thisTdVals_v)
+						if ContactInfoMustReg.MatchString(thisTdVals_v) {
+							jumpNextTd = true
+							thisTrHasMatch = true
+							tr.TDs[td_index+1].SortKV.AddKey(myContactType+thisTdVals_v, nextTdVals[thisTdVals_k])
+						}
+					}
+				}
+			}
+		}
+	} else {
+		jumpNextTd = false
+	}
+	return jumpNextTd, thisTrHasMatch
+}
+
+//采购单位,代理机构
+func (tn *Table) tdHasOrderContactType(td_k string, indexMap *map[int]string, tr *TR, prevCanAddToIndexMap, isCanAddToIndexMap, matchPrevFlag *bool, td_index int) (gotoFunc bool) {
+	for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
+		if !ContactType[k].MatchString(td_k) {
+			continue
+		}
+		if len(*indexMap) == 0 {
+			if (*isCanAddToIndexMap) || (*prevCanAddToIndexMap && len(tr.TDs) == 1) {
+				myPrevTdVal := ""
+				if td_index-2 >= 0 {
+					myPrevTdVal = tr.TDs[td_index-2].Val
+				}
+				if myPrevTdVal != "" && len([]rune(myPrevTdVal)) < 10 && ContactInfoMustReg.MatchString(myPrevTdVal) {
+					(*matchPrevFlag) = true
+				}
+				(*indexMap)[0] = k
+				break
+			}
+		} else {
+			(*indexMap) = map[int]string{}
+			return false
+		}
+	}
+	return true
+}
+
+//1.处理带括号的()[]【】采购单位,代理机构;2.识别采购单位联系人、联系电话、代理机构联系人、联系电话
+func (tn *Table) tdKV(key string, matchPrevFlag, isCanAddToIndexMap *bool, indexMap *map[int]string, gotoName string) (goOnFunc, isContinue bool, td_k string) {
+	td_k = FilterContactKey(key) //带括号的()[]【】采购单位,代理机构处理
+	td_k_length := len([]rune(td_k))
+	if td_k_length < 2 || td_k_length > 15 {
+		return true, true, "" //继续执行,跳过当前循环
+	}
+	isContinue = ContactInfoMustReg.MatchString(td_k) //识别采购单位联系人、联系电话、代理机构联系人、联系电话
+	if isContinue || (ContactInfoVagueReg.MatchString(td_k) && u.IsMapHasValue(td_k, ContactType)) {
+		if gotoName == "LS" && !(*matchPrevFlag) && len(*indexMap) > 0 {
+			(*indexMap) = map[int]string{}
+			return false, false, "" //中断外层循环
+		}
+		if gotoName == "LL" && len(*indexMap) > 0 {
+			(*indexMap) = map[int]string{}
+			return false, false, ""
+		}
+		(*isCanAddToIndexMap) = true
+	}
+	return true, false, td_k //继续执行,不结束当前循环,返回处理后的值
+}
+
+//获取td冒号kv
+func (tn *Table) tdkv(td *TD) []*u.Kv {
+	thisTdKvs := colonkvEntity.GetKvs(td.Text, tn.Desc, 2) //获取冒号kv
+	//获取冒号
+	if len(thisTdKvs) == 0 {
+		tdValue := regReplAllSpace.ReplaceAllString(td.Text, "") //去除空格换行
+		if tdValue != "" && len([]rune(tdValue)) < 10 {
+			thisTdKvs = append(thisTdKvs, &u.Kv{
+				Key:   tdValue,
+				Value: "",
+			})
+		}
+	}
+	return thisTdKvs
+}
+
 func (table *Table) analyBrand() {
 	//5c2d8c05a5cb26b9b782572b
 	//产品名称 品牌 规格 单价 单位 数量  小计 质保期
@@ -2977,6 +3046,7 @@ func (table *Table) analyBrand() {
 		}
 	}
 }
+
 //初始化lineMapArr,lineMap
 func initLineMapLineMapArr(table *Table, lineMapArr map[string]*SortMap, lineMap map[string]*SortMap) {
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序

+ 11 - 11
src/jy/pretreated/tablev2.go

@@ -128,8 +128,8 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 	text := dwReg.ReplaceAllString(u.TrimLRAll(txt, ""), "$1")
 	td.Val = text //值
 	td.Text = txt //原始串
-	//对td单元格值判断是否是key和根据td内容长度进行分块处理
-	td.tdIsKey(tr, table, bsontable)
+	//对td单元格值判断是否是表头和根据td内容长度进行分块处理
+	td.tdIsHb(tr, table, bsontable)
 	bhead := false
 	if td.TR.RowPos == 0 { //第一行
 		if td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
@@ -228,13 +228,13 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR, table *Table) {
 	}
 }
 
-//对td单元格值判断是否是key和根据td内容长度进行分块处理
-func (td *TD) tdIsKey(tr *TR, table *Table, bsontable bool) {
+//对td单元格值判断是否是表头和根据td内容长度进行分块处理
+func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 	lenval := len([]rune(td.Val)) //经过处理的td内容长度
 	//if lentxt > 9 {
 	//td.KV = GetKVAll(txt, "")
 	ub := []*u.Block{}
-	//经过处理的td内容长度大于50
+	//经过处理的td内容长度大于50,划块,分包
 	if lenval > 50 {
 		//看是否划块
 		ub, _ = DivideBlock(td.Text, 2, nil) //对td的原始值
@@ -271,15 +271,15 @@ func (td *TD) tdIsKey(tr *TR, table *Table, bsontable bool) {
 		}*/
 		if len(tr.TDs) > 0 {
 			tdleft := tr.TDs[len(tr.TDs)-1]
-			if tdleft.BH && excludeKey.MatchString(tdleft.Text) {
+			if tdleft.BH && excludeKey.MatchString(tdleft.Text) {//(涉及包号|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)
 				isFindPkg = false
 			}
 		}
 		if isFindPkg {
 			if len(ub) > 0 {
-				blockPackage = FindPackageFromBlocks(&ub, "")
+				blockPackage = FindPackageFromBlocks(&ub, "")//从块里面找分包
 			} else {
-				blockPackage = FindPackageFromText("", td.Val)
+				blockPackage = FindPackageFromText("", td.Val)//从正文里面找分包
 			}
 		}
 		if len(blockPackage) > 0 {
@@ -309,8 +309,8 @@ func (td *TD) tdIsKey(tr *TR, table *Table, bsontable bool) {
 			}
 		}
 	}
-	//经过处理的td内容长度小于等于50
-	if lenval <= 50 {
+	//经过处理的td内容长度小于50,冒号kv,td表头
+	if lenval < 50 {
 		//		td.SortKV = FindKv(text, "")
 		kvTitle := ""
 		if len(td.TR.TDs) > 0 {
@@ -381,7 +381,7 @@ func (td *TD) tdIsKey(tr *TR, table *Table, bsontable bool) {
 		if len(td.TR.TDs) > 0 {
 			kvTitle = td.TR.TDs[len(td.TR.TDs)-1].Val
 		}
-		_, resm := colonkvEntity.entrance(td.Val, kvTitle, 2)
+		_, resm := colonkvEntity.entrance(td.Val, kvTitle, 2)//获取冒号kv入口
 		for k, v := range resm {
 			td.SortKV.AddKey(k, v)
 		}