fengweiqiang vor 5 Jahren
Ursprung
Commit
eab4b08b55

+ 4 - 1
src/jy/extract/extpackage.go

@@ -148,6 +148,9 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 					sonJobResult["origin"] = pkg.Origin
 					sonJobResult["text"] = pkg.Text
 					sonJobResult["name"] = pkg.Name
+					if pkg.Winner!= ""{
+						sonJobResult["winner"] = pkg.Winner
+					}
 					if pkg.WinnerPerson != "" {
 						sonJobResult["winnertel"] = pkg.WinnerTel
 						sonJobResult["winnerperson"] = pkg.WinnerPerson
@@ -177,7 +180,7 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 							}
 						}
 					} else {
-						if len(j.Winnerorder) > 0 {
+						if sonJobResult["winner"] == "" && len(j.Winnerorder) > 0 {
 							if j.Winnerorder[0]["price"] != nil {
 								sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
 							}

+ 20 - 0
src/jy/extract/extract.go

@@ -930,6 +930,9 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 				if in.Field == "projectname" && vbpkg.Name != "" {
 					continue
 				}
+				if in.Field == "winner" && vbpkg.Winner != "" {
+					continue
+				}
 				if in.Field == "winnerperson" {
 					if vbpkg.Winner == "" || len(vbpkg.Winner) < 4 {
 						continue
@@ -1769,6 +1772,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			if (savewinner == nil || len(savewinner) == 0) && tmp["winner"] != nil {
 				tmp["s_winner"] = tmp["winner"]
 			} else if savewinner != nil {
+				savewinner = RemoveReplicaSliceString(savewinner)
 				tmp["s_winner"] = strings.Join(savewinner, ",")
 			}
 
@@ -1777,6 +1781,11 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			tmp["s_winner"] = tmp["winner"]
 		}
 		if len(j.Winnerorder) > 0 { //候选人信息
+			for i,v := range j.Winnerorder{
+				if v["price"]!= nil{
+					j.Winnerorder[i]["price"] = clear.ObjToMoney([]interface{}{v["price"],""})[0]
+				}
+			}
 			tmp["winnerorder"] = j.Winnerorder
 		}
 		//处理附件
@@ -2256,3 +2265,14 @@ func resetWinnerorder(j *ju.Job) {
 	//j.Result["bidamount"] = bidamounts
 
 }
+func RemoveReplicaSliceString(slc []string) []string {
+	result := make([]string, 0)
+	tempMap := make(map[string]bool, len(slc))
+	for _, e := range slc{
+		if tempMap[e] == false{
+			tempMap[e] = true
+			result = append(result, e)
+		}
+	}
+	return result
+}

+ 2 - 1
src/jy/pretreated/analystep.go

@@ -53,6 +53,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 		//log.Println(con)
 		bl := &util.Block{}
 		newCon := con
+		//log.Println(con)
 		if len(tabs) > 0 { //解析表格逻辑
 			job.HasTable = 1 //添加标识:文本中有table
 			newCon = TextAfterRemoveTable(con)
@@ -105,7 +106,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 										job.BlockPackage[k].WinnerOrder = append(job.BlockPackage[k].WinnerOrder, map[string]interface{}{
 											"type":    0,
 											"price":   0.0,
-											"entname": vv.Value,
+											"entname": strings.TrimSpace(vv.Value),
 											"sort":    tmpw,
 										})
 										tmpw++

+ 2 - 2
src/jy/pretreated/analytable.go

@@ -49,7 +49,7 @@ var (
 	FindVal_1  = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
 	FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
 	//判断分包前排除
-	excludeKey  = regexp.MustCompile("(标段代码|涉及包号|分包数量|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分
+	excludeKey  = regexp.MustCompile("(标段代码|涉及包号|分包数量|包件号?|项目标号|规格|型号|招标范围|业绩|废标|标段选择要求)|(^编号$)|([^包段标]编号)") //编号|划分
 	excludeKey2 = regexp.MustCompile("包/[0-9]{0,4}[|箱|纸|张]")
 	//-------------
 
@@ -2324,7 +2324,7 @@ func (tn *Table) isGoonNext(isSite bool, codeSite string) {
 func foundPacBySortKV(tn *Table, val int, index []string, index_pos []int, keyExistsCount *map[string]int, commonKeyVals *map[string][]string, key_index int, hasPkgTd map[string]bool) (rval int, rindex []string, rindex_pos []int) {
 	keyIsPkg := false
 	for in, k := range tn.SortKV.Keys {
-		if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) { //判断分包前排除
+		if excludeKey.MatchString(BracketsTextReg.ReplaceAllString(k, "")) ||regPDFWarap.MatchString(k)||regAZWarap.MatchString(k){ //判断分包前排除
 			continue
 		}
 		v := tn.SortKV.Map[k]

+ 82 - 103
src/jy/pretreated/division.go

@@ -57,10 +57,15 @@ var (
 	regMoreWrap        = regexp.MustCompile("[\r\n]{2,}")
 	regStrWrap         = regexp.MustCompile("分包名称[::]")
 	regBZJWarap        = regexp.MustCompile("保证金.*")
+	regPDFWarap        = regexp.MustCompile("[a-zA-Z](包|标段).(pdf|PDF)")
+	regAZWarap         = regexp.MustCompile("(标[a-zA-Z]取值|标段划分)")
 	replSerial         = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、..::,])+\\d")
 	moreColonReg       = regexp.MustCompile("[::]+")
 	regFilter          = regexp.MustCompile("等$")
 	pkgFilter          = regexp.MustCompile("第[一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ](子|合同|分|施工|监理)?(标段?|包|合同段|标包)|(子|合同|分|施工|监理)?(标|包)(段|号)?")
+	indexTile          = regexp.MustCompile("[0-9.]{2,3}[\\s\u4e00-\u9fa5]{2,8}[::]+") //小标题
+	indexTile2         = regexp.MustCompile("[\\s\u4e00-\u9fa5]{2,8}")
+	regReplAllSpace2   = regexp.MustCompile("[\u3000\u2003\u00a0\\s0-9.::、\\(\\)]+")
 	confusion          = map[string]string{
 		"参与": "canyu",
 	}
@@ -566,63 +571,71 @@ func FindPackageFromBlocks(blocks *[]*util.Block, isSite bool, codeSite string)
 	//orderwinner := winnerOrderEntity.Find(content, true, 2, isSite, codeSite)
 
 	for k, v := range blockPackage {
-		if v.ColonKV != nil && v.ColonKV.KvTags != nil {
-			for kc, cv := range v.ColonKV.KvTags {
-				if kc == "预算" && v.Budget <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Budget = vf
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Budget = float64(vi)
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						}
+		findWinnerBugetBidmountByKv(v, blockPackage, k)
+	}
+	return
+}
+
+func findWinnerBugetBidmountByKv(v *util.BlockPackage, blockPackage map[string]*util.BlockPackage, k string) {
+	if v.ColonKV != nil && v.ColonKV.KvTags != nil {
+		for kc, cv := range v.ColonKV.KvTags {
+			if kc == "预算" && v.Budget <= 0 {
+				moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+				if len(moneys) > 0 {
+					if vf, ok := moneys[0].(float64); ok {
+						blockPackage[k].Budget = vf
+						blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
+					} else if vi, ok := moneys[0].(int); ok {
+						blockPackage[k].Budget = float64(vi)
+						blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
 					}
-				} else if kc == "中标金额" && v.Bidamount <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Bidamount = vf
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Bidamount = float64(vi)
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						}
+				}
+			} else if kc == "中标金额" && v.Bidamount <= 0 {
+				moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+				if len(moneys) > 0 {
+					if vf, ok := moneys[0].(float64); ok {
+						blockPackage[k].Bidamount = vf
+						blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
+					} else if vi, ok := moneys[0].(int); ok {
+						blockPackage[k].Bidamount = float64(vi)
+						blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
 					}
 				}
+			} else if kc == "中标单位" && v.Winner == "" {
+				blockPackage[k].Winner = cv[0].Value
 			}
 		}
-		if v.SpaceKV != nil && v.SpaceKV.KvTags != nil {
-			for kc, cv := range v.SpaceKV.KvTags {
-				if kc == "预算" && v.Budget <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Budget = vf
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Budget = float64(vi)
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						}
+	}
+	if v.SpaceKV != nil && v.SpaceKV.KvTags != nil {
+		for kc, cv := range v.SpaceKV.KvTags {
+			if kc == "预算" && v.Budget <= 0 {
+				moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+				if len(moneys) > 0 {
+					if vf, ok := moneys[0].(float64); ok {
+						blockPackage[k].Budget = vf
+						blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
+					} else if vi, ok := moneys[0].(int); ok {
+						blockPackage[k].Budget = float64(vi)
+						blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
 					}
+				}
 
-				} else if kc == "中标金额" && v.Bidamount <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Bidamount = vf
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Bidamount = float64(vi)
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						}
+			} else if kc == "中标金额" && v.Bidamount <= 0 {
+				moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+				if len(moneys) > 0 {
+					if vf, ok := moneys[0].(float64); ok {
+						blockPackage[k].Bidamount = vf
+						blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
+					} else if vi, ok := moneys[0].(int); ok {
+						blockPackage[k].Bidamount = float64(vi)
+						blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
 					}
 				}
+			} else if kc == "中标单位" && v.Winner == "" {
+				blockPackage[k].Winner = cv[0].Value
 			}
 		}
 	}
-	return
 }
 
 //从正文里面找分包
@@ -631,61 +644,7 @@ func FindPackageFromText(title string, content string, isSite bool, codeSite str
 	//从正文里面找分包
 	divisionPackageChild(&blockPackage, content, title, true, false, isSite, codeSite)
 	for k, v := range blockPackage {
-		if v.ColonKV != nil && v.ColonKV.KvTags != nil {
-			for kc, cv := range v.ColonKV.KvTags {
-				if kc == "预算" && v.Budget <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Budget = vf
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Budget = float64(vi)
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						}
-					}
-				} else if kc == "中标金额" && v.Bidamount <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Bidamount = vf
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Bidamount = float64(vi)
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						}
-					}
-				}
-			}
-		}
-		if v.SpaceKV != nil && v.SpaceKV.KvTags != nil {
-			for kc, cv := range v.SpaceKV.KvTags {
-				if kc == "预算" && v.Budget <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Budget = vf
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Budget = float64(vi)
-							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
-						}
-					}
-
-				} else if kc == "中标金额" && v.Bidamount <= 0 {
-					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
-					if len(moneys) > 0 {
-						if vf, ok := moneys[0].(float64); ok {
-							blockPackage[k].Bidamount = vf
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						} else if vi, ok := moneys[0].(int); ok {
-							blockPackage[k].Bidamount = float64(vi)
-							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
-						}
-					}
-				}
-			}
-		}
+		findWinnerBugetBidmountByKv(v, blockPackage, k)
 	}
 	//winnerOrderEntity.Find(content, true, 2, isSite, codeSite)
 	return
@@ -694,6 +653,8 @@ func FindPackageFromText(title string, content string, isSite bool, codeSite str
 //分块之后分包
 func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool, isSite bool, codeSite string) (bool, string) {
 	//查找知否有分包
+	content = regPDFWarap.ReplaceAllString(content, "\n")
+	content = regAZWarap.ReplaceAllString(content, "\n")
 	content = regStrWrap.ReplaceAllString(content, "\n")
 	content = regMoreWrap.ReplaceAllString(content, "\n")
 	content = regEndWrap.ReplaceAllString(content, "")
@@ -731,7 +692,6 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	con = conTemp
 	con = replSerial.ReplaceAllString(con, "\n")
 	con = regMoreWrap.ReplaceAllString(con, "\n")
-	//log.Println(con)
 	//根据分包,找索引位置
 	indexMap := map[int]int{}
 	indexKeyStringMap := map[int]string{}
@@ -740,6 +700,11 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	startEndMap := map[int]int{}
 	pkgIndexMap := map[string][]int{}
 	indexPkgMap := map[int]string{}
+	//小标题
+	titleindexs := indexTile.FindAllStringIndex(con, -1)
+	if len(titleindexs) == 0 {
+		titleindexs = indexTile2.FindAllStringIndex(con, -1)
+	}
 	//遍历分包,把kv在包前面的移动到包后面
 	for _, v := range pkg {
 		pgflag := v[0] + "[::]*"
@@ -788,12 +753,12 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 			indexKeyStringMap[iv] = indexKeyStringMap[indexs[ik-1]]
 		}
 	}
-	//
 	//获取截取标识
 	surplusText, maxWarpCount, indexTextMap, indexWarpMap := interceptText(indexs, indexPkgMap, pkgIndexMap, startEndMap, con)
 	//查找分包内容,分kv
 	for _, iv := range indexs {
 		text := indexTextMap[iv]
+		tmptext := text
 		//
 		warpIndex := regSpliteSegment.FindAllStringIndex(text, -1)
 		if len(indexWarpMap) > 0 {
@@ -812,6 +777,20 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 			index := util.PackageNumberConvert(bk)
 			//去掉前缀,空格必须要加,分kv的时候要用
 			text = regexp.MustCompile(bv[0] + "[::]*").ReplaceAllString(text, "")
+			if strings.TrimLeft(tmptext, bv[0]) == text || strings.TrimLeft(tmptext, bv[0]+":") == text {
+				var tagtitle string
+				for i, v := range titleindexs {
+					if i == 0 {
+						continue
+					}
+					if v[0] > iv {
+						tagtitle = con[titleindexs[i-1][0]:titleindexs[i-1][1]]
+						break
+					}
+				}
+				tagtitle = regReplAllSpace2.ReplaceAllString(tagtitle, "")
+				text = tagtitle + ":" + text
+			}
 			headKey := ""
 			if indexKeyStringMap[iv] != "" {
 				//if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {
@@ -835,7 +814,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 				}
 				MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
 				//合并空格kv
-				spaceJobKv := SspacekvEntity.Entrance(text, "", nil, isSite, codeSite)
+				spaceJobKv := SspacekvEntity.Entrance(text, headKey, nil, isSite, codeSite)
 				MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
 			} else {
 				newBpkg := &util.BlockPackage{

+ 1 - 1
src/jy/pretreated/winnerorder.go

@@ -235,7 +235,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				val := wo.clear("中标单位", v)
 				if val != nil {
 					count++
-					object["entname"] = val
+					object["entname"] = strings.TrimSpace(qutil.ObjToString(val))
 					object["sort"] = wo.toNumber(k, count)
 					object["sortstr"] = thisNumberReg.FindString(k)
 					object["type"] = i