浏览代码

Merge branch 'dev3.4' of http://39.105.157.10:10080/qmx/jy-data-extract into dev3.4

maxiaoshan 5 年之前
父节点
当前提交
085a07bf0a

+ 2 - 1
src/jy/extract/extpackage.go

@@ -129,6 +129,7 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 				if pkg != nil {
 					sonJobResult["origin"] = pkg.Origin
 					sonJobResult["text"] = pkg.Text
+					sonJobResult["name"] = pkg.Name
 					if pkg.IsTrueBudget{
 						sonJobResult["budget"] = pkg.Budget
 					}
@@ -174,7 +175,7 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 							}
 						}
 					}
-					if sonJobResult["name"] == nil {
+					if sonJobResult["name"] == nil ||sonJobResult["name"] == ""{
 						sonJobResult["name"] = j.Title
 					}
 				}

+ 21 - 10
src/jy/extract/extract.go

@@ -27,12 +27,12 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut           = ju.NewCut()                          //获取正文并清理
-	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask                //任务列表
-	ClearTaskList map[string]*ClearTask                  //清理任务列表
-	saveLimit     = 100                                  //抽取日志批量保存
-	PageSize      = 5000                                 //查询分页
+	cut     = ju.NewCut()                          //获取正文并清理
+	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask          //任务列表
+	ClearTaskList map[string]*ClearTask            //清理任务列表
+	saveLimit     = 100                            //抽取日志批量保存
+	PageSize      = 5000                           //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -908,9 +908,19 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 		for k, vbpkg := range j.BlockPackage {
 			rep := map[string]string{}
 			if in.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
-				if !((in.Field == "budget" && vbpkg.Budget > 0) || (in.Field == "bidamount" && vbpkg.Bidamount > 0) ||
-					(in.Field == "winner" && vbpkg.Winner == "") || (in.Field == "bidstatus" && vbpkg.BidStatus == "") ||
-					(in.Field == "projectname" && vbpkg.Name == "")) {
+				if in.Field == "budget" && vbpkg.Budget > 0 {
+					continue
+				}
+				if in.Field == "bidamount" && vbpkg.Bidamount > 0 {
+					continue
+				}
+				if in.Field == "winner" && vbpkg.Winner != "" {
+					continue
+				}
+				if in.Field == "bidstatus" && vbpkg.BidStatus != "" {
+					continue
+				}
+				if in.Field == "projectname" && vbpkg.Name != "" {
 					continue
 				}
 				//处理正负数修正
@@ -1860,6 +1870,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 		} else { //测试结果
 			delete(tmp, "_id")
+			//delete(tmp, "fieldall")
 			if len(j.BlockPackage) > 0 { //分包详情
 				if len(j.BlockPackage) > 10 {
 					tmp["epackage"] = "分包异常"
@@ -2058,7 +2069,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 {                            //reids未找到,执行规则匹配
+	if i == 0 { //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 2 - 2
src/jy/pretreated/analystep.go

@@ -351,11 +351,11 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job, i
 			if tv.WinnerOrder != nil && len(tv.WinnerOrder) > 0 {
 				bv.WinnerOrder = tv.WinnerOrder
 			}
-			if tv.Bidamount > 0 && bv.Bidamount == 0 {
+			if tv.Bidamount >= 0 && tv.IsTrueBidamount {
 				bv.Bidamount = tv.Bidamount
 				bv.IsTrueBidamount = tv.IsTrueBidamount
 			}
-			if tv.Budget >0 && bv.Bidamount == 0{
+			if tv.Budget >= 0 && tv.IsTrueBudget {
 				bv.Budget = tv.Budget
 				bv.IsTrueBudget = tv.IsTrueBudget
 			}

+ 78 - 5
src/jy/pretreated/analytable.go

@@ -44,11 +44,12 @@ var (
 	FilterKey_2 = regexp.MustCompile("招标|投标|项目")
 	//根据表格的key进行分包打分
 	FindKey_2 = regexp.MustCompile("([分子][包标](号)?|标[号项段包](划分)?|包件?[号段名数])")
+	FindKey_3 = regexp.MustCompile("(标段编号)")
 	//对值进行分包判断
 	FindVal_1  = regexp.MustCompile("[第]?([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)((子|合同|分|施工|监理)?(标段?|包|合同段|标包))|((子|合同|分|施工|监理)?(标|包)(段|号)?)[  \u3000\u2003\u00a0]*([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)")
 	FindVal2_1 = regexp.MustCompile("([一二三四五六七八九十0-9A-Za-zⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ\\-]+)|^(设计|施工|监理|验收)[分子]?[标包]?[段号]?$")
 	//判断分包前排除
-	excludeKey = regexp.MustCompile("(涉及包号|分包数量|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分
+	excludeKey = regexp.MustCompile("(标段代码|涉及包号|分包数量|包件号?|项目标号|规格|型号|招标范围|业绩|废标)|(^编号$)|([^包段标]编号)") //编号|划分
 	//-------------
 
 	cut = u.NewCut()
@@ -605,6 +606,15 @@ func (table *Table) MergerToTableresult() {
 						}
 					}
 				}
+				if bp.Bidamount <= 0 && !bp.IsTrueBidamount {
+					bp.Bidamount = v1.Bidamount
+					bp.IsTrueBidamount = v1.IsTrueBidamount
+				}
+				if bp.Budget <= 0 && !bp.IsTrueBudget {
+					bp.Budget = v1.Budget
+					bp.IsTrueBudget = v1.IsTrueBudget
+				}
+				bp.Text += bp.Text
 				if len(v1.WinnerOrder) > 0 && len(bp.WinnerOrder) == 0 {
 					bp.WinnerOrder = v1.WinnerOrder
 				}
@@ -2035,7 +2045,42 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool, codeSite string) (b bool,
 					bp.Index = v             //序号 (转换后编号,只有数字或字母)
 					bp.Origin = oldIndex[nk] //包的原始值
 					bp.TableKV = kv          //table kv (分出的对应的KV值)
-					bp.Text = tn.Html
+					bp.Name = v
+					if bp.TableKV != nil && bp.TableKV.KvTags != nil && len(bp.TableKV.KvTags) > 0 {
+						for kc, cv := range bp.TableKV.KvTags {
+							if kc == "预算" && bp.Budget <= 0 {
+								moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+								if len(moneys) > 0 {
+									if vf, ok := moneys[0].(float64); ok {
+										bp.Budget = vf
+										bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
+									} else if vi, ok := moneys[0].(int); ok {
+										bp.Budget = float64(vi)
+										bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
+									}
+								}
+							} else if kc == "中标金额" && bp.Bidamount <= 0 {
+								moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+								if len(moneys) > 0 {
+									if vf, ok := moneys[0].(float64); ok {
+										bp.Bidamount = vf
+										bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
+									} else if vi, ok := moneys[0].(int); ok {
+										bp.Bidamount = float64(vi)
+										bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
+									}
+								}
+							} else if kc == "中标单位" && bp.Winner == "" {
+								bp.Winner = cv[0].Value
+							}
+							//拼接内容
+							if !excludeKey.MatchString(kc) {
+								bp.Text += fmt.Sprintf("%v:%v\n", kc, cv[0].Value)
+							}
+
+						}
+
+					}
 					tn.BlockPackage.AddKey(v, bp) //table子包数组
 				}
 			}
@@ -2132,7 +2177,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 				if vcgdw.Value == "采购单位" {
 				} else if vcgdw.Value == "预算" && len(val) == len(index) {
 					for bi, bv := range index {
-						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget == 0 {
+						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Budget <= 0 {
 							moneys := clear.ObjToMoney([]interface{}{val[bi], ""})
 							if len(moneys) > 0 {
 								if vf, ok := moneys[0].(float64); ok {
@@ -2147,7 +2192,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 					}
 				} else if vcgdw.Value == "中标金额" && len(val) == len(index) {
 					for bi, bv := range index {
-						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount == 0 {
+						if tn.BlockPackage.Map[bv].(*u.BlockPackage).Bidamount <= 0 {
 							moneys := clear.ObjToMoney([]interface{}{val[bi], ""})
 							if len(moneys) > 0 {
 								if vf, ok := moneys[0].(float64); ok {
@@ -2371,6 +2416,12 @@ func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, inde
 					index_pos = append(index_pos, in2)
 					val += 1
 					pac++
+				} else if FindKey_3.MatchString(k) {
+					//5db2a101a5cb26b9b73054ac
+					index = append(index, v1)
+					index_pos = append(index_pos, in2)
+					val += 1
+					pac++
 				} else {
 					if ok, v1new := isHasOnePkgAndNoKv(v1); ok { //td的值里面有一个包,并且没有冒号kv
 						haspkgs = append(haspkgs, v1new)
@@ -2431,6 +2482,28 @@ func (tn *Table) assemblePackage(k1, v1, key string, isSite bool, codeSite strin
 		for k3, v3 := range kvTags {
 			if bp.TableKV.KvTags[k3] == nil {
 				bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
+			} else if k3 == "预算" && bp.Budget <= 0 {
+				moneys := clear.ObjToMoney([]interface{}{v3[0].Value, ""})
+				if len(moneys) > 0 {
+					if vf, ok := moneys[0].(float64); ok {
+						bp.Budget = vf
+						bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
+					} else if vi, ok := moneys[0].(int); ok {
+						bp.Budget = float64(vi)
+						bp.IsTrueBudget = moneys[len(moneys)-1].(bool)
+					}
+				}
+			} else if k3 == "中标金额" && bp.Bidamount <= 0 {
+				moneys := clear.ObjToMoney([]interface{}{v3[0].Value, ""})
+				if len(moneys) > 0 {
+					if vf, ok := moneys[0].(float64); ok {
+						bp.Bidamount = vf
+						bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
+					} else if vi, ok := moneys[0].(int); ok {
+						bp.Bidamount = float64(vi)
+						bp.IsTrueBidamount = moneys[len(moneys)-1].(bool)
+					}
+				}
 			}
 		}
 	}
@@ -3339,7 +3412,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{

+ 90 - 28
src/jy/pretreated/division.go

@@ -2,6 +2,7 @@ package pretreated
 
 import (
 	"fmt"
+	"jy/clear"
 	"jy/util"
 
 	qutil "qfw/util"
@@ -55,7 +56,8 @@ var (
 	regStartWrap       = regexp.MustCompile("^[\r\n]")
 	regEndWrap         = regexp.MustCompile("[\r\n]$")
 	regMoreWrap        = regexp.MustCompile("[\r\n]{2,}")
-	regStrWrap        = regexp.MustCompile("分包名称[::]")
+	regStrWrap         = regexp.MustCompile("分包名称[::]")
+	regBZJWarap        = regexp.MustCompile("保证金.*")
 	replSerial         = regexp.MustCompile("(\r\n|^)([\\d一二三四五六七八九十][、..::,])+\\d")
 	moreColonReg       = regexp.MustCompile("[::]+")
 	regFilter          = regexp.MustCompile("等$")
@@ -90,7 +92,7 @@ var (
 )
 
 //分块
-func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite bool,codeSite string) ([]*util.Block, int) {
+func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite bool, codeSite string) ([]*util.Block, int) {
 	defer qutil.Catch()
 	returnValue := 0
 	var blocks []*util.Block
@@ -270,7 +272,7 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite
 		}
 		block.Title = title
 		block.Titles = titles
-		if ruleBlock != nil{
+		if ruleBlock != nil {
 			block.Classify, block.NotClassifyTitles = ruleBlock.Classify.GetClassify(tp, titles)
 		}
 		tagsToBlocks(blocks, block)
@@ -304,8 +306,8 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite
 	for _, bl := range returnBlocks {
 		//解析kv
 		newText := TextAfterRemoveTable(bl.Text)
-		bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from,isSite,codeSite)
-		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat,isSite,codeSite)
+		bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from, isSite, codeSite)
+		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat, isSite, codeSite)
 		//正则抽取的时候有时需要匹配换行或者句号,这里在解析完kv之后,在块结尾添加换行和句号
 		bl.Text = appendWarpStop(bl.Text)
 	}
@@ -548,7 +550,7 @@ func filterTitle(title string) string {
 }
 
 //从块里面找分包
-func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool,codeSite string) (blockPackage map[string]*util.BlockPackage) {
+func FindPackageFromBlocks(blocks *[]*util.Block, isSite bool, codeSite string) (blockPackage map[string]*util.BlockPackage) {
 	blockPackage = map[string]*util.BlockPackage{}
 	//块分包
 	for _, v := range *blocks {
@@ -559,7 +561,7 @@ func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool,codeSite string) (b
 		}
 		//var ok bool
 		//var surplusText string
-			divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"],isSite,codeSite)
+		divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"], isSite, codeSite)
 		////把分包内容摘除掉有问题 有的项目名称中包含二标段
 		//if ok && false {
 		//	v.Text = surplusText
@@ -567,23 +569,81 @@ func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool,codeSite string) (b
 		//	v.SpaceKV = SspacekvEntity.Entrance(surplusText, v.Title, nil)
 		//}
 	}
+	for k, v := range blockPackage {
+		if v.ColonKV != nil && v.ColonKV.KvTags != nil {
+			for kc, cv := range v.ColonKV.KvTags {
+				if kc == "预算" && v.Budget <= 0 {
+					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+					if len(moneys) > 0 {
+						if vf, ok := moneys[0].(float64); ok {
+							blockPackage[k].Budget = vf
+							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
+						} else if vi, ok := moneys[0].(int); ok {
+							blockPackage[k].Budget = float64(vi)
+							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
+						}
+					}
+				} else if kc == "中标金额" && v.Bidamount <= 0 {
+					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+					if len(moneys) > 0 {
+						if vf, ok := moneys[0].(float64); ok {
+							blockPackage[k].Bidamount = vf
+							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
+						} else if vi, ok := moneys[0].(int); ok {
+							blockPackage[k].Bidamount = float64(vi)
+							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
+						}
+					}
+				}
+			}
+		}
+		if v.SpaceKV != nil && v.SpaceKV.KvTags != nil {
+			for kc, cv := range v.SpaceKV.KvTags {
+				if kc == "预算" && v.Budget <= 0 {
+					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+					if len(moneys) > 0 {
+						if vf, ok := moneys[0].(float64); ok {
+							blockPackage[k].Budget = vf
+							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
+						} else if vi, ok := moneys[0].(int); ok {
+							blockPackage[k].Budget = float64(vi)
+							blockPackage[k].IsTrueBudget = moneys[len(moneys)-1].(bool)
+						}
+					}
+
+				} else if kc == "中标金额" && v.Bidamount <= 0 {
+					moneys := clear.ObjToMoney([]interface{}{cv[0].Value, ""})
+					if len(moneys) > 0 {
+						if vf, ok := moneys[0].(float64); ok {
+							blockPackage[k].Bidamount = vf
+							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
+						} else if vi, ok := moneys[0].(int); ok {
+							blockPackage[k].Bidamount = float64(vi)
+							blockPackage[k].IsTrueBidamount = moneys[len(moneys)-1].(bool)
+						}
+					}
+				}
+			}
+		}
+	}
 	return
 }
 
 //从正文里面找分包
-func FindPackageFromText(title string, content string,isSite bool,codeSite string) (blockPackage map[string]*util.BlockPackage) {
+func FindPackageFromText(title string, content string, isSite bool, codeSite string) (blockPackage map[string]*util.BlockPackage) {
 	blockPackage = map[string]*util.BlockPackage{}
 	//从正文里面找分包
-	divisionPackageChild(&blockPackage, content, title, true, false,isSite,codeSite)
+	divisionPackageChild(&blockPackage, content, title, true, false, isSite, codeSite)
 	return
 }
 
 //分块之后分包
-func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool,isSite bool,codeSite string) (bool, string) {
+func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool, isSite bool, codeSite string) (bool, string) {
 	//查找知否有分包
 	content = regStrWrap.ReplaceAllString(content, "\n")
 	content = regMoreWrap.ReplaceAllString(content, "\n")
 	content = regEndWrap.ReplaceAllString(content, "")
+	content = regBZJWarap.ReplaceAllString(content, "")
 	con, pkg, flag := CheckMultiPackage(content, title)
 	if !flag {
 		return false, ""
@@ -597,7 +657,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 		if len(pkg) == 1 && strings.HasSuffix(con, v[0]) {
 			return false, ""
 		}
-		is := regexp.MustCompile(v[0]+"[::]*").FindAllStringIndex(con, -1)
+		is := regexp.MustCompile(v[0] + "[::]*").FindAllStringIndex(con, -1)
 		for _, sv := range is {
 			appendWarpIndex = append(appendWarpIndex, sv[0])
 		}
@@ -637,13 +697,13 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 			indexPkgMap[sv[0]] = v[0]
 		}
 		//key在包前面,并且在一行的开头
-		keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
+		keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)" + pgflag).FindAllStringSubmatchIndex(con, -1)
 		if len(keys) == 0 {
 			//key在包前面,并且key以冒号结尾
-			keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
+			keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
 		}
 		if len(keys) == 0 {
-			keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
+			keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
 		}
 		for _, key := range keys {
 			startEndMap[key[5]] = key[4]
@@ -697,7 +757,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 			}
 			index := util.PackageNumberConvert(bk)
 			//去掉前缀,空格必须要加,分kv的时候要用
-			text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
+			text = regexp.MustCompile(bv[0] + "[::]*").ReplaceAllString(text, "")
 			headKey := ""
 			if indexKeyStringMap[iv] != "" {
 				//if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {
@@ -706,6 +766,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 				//}
 				for _, pkgIndexMap_v := range pkgIndexMap[bv[0]] {
 					delete(indexKeyStringMap, pkgIndexMap_v)
+					break
 				}
 			}
 			//如果一块中有多个相同的包,合并到一个
@@ -713,31 +774,32 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 				//合并文本
 				(*blockPackage)[index].Text += "\n" + text
 				//合并冒号kv
-				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1,isSite,codeSite)
+				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1, isSite, codeSite)
 				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4,isSite,codeSite)
+					kvAgain := GetKVAll(text, "", nil, 4, isSite, codeSite)
 					MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
 				}
 				MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
 				//合并空格kv
-				spaceJobKv := SspacekvEntity.Entrance(text, "", nil,isSite,codeSite)
+				spaceJobKv := SspacekvEntity.Entrance(text, "", nil, isSite, codeSite)
 				MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
 			} else {
 				newBpkg := &util.BlockPackage{
 					Origin:   bk,
 					Text:     text,
 					Index:    index,
+					Name:     bv[0],
 					Type:     bv[1],
 					Accuracy: accuracy,
 				}
 				//fmt.Println(text)
-				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4,isSite,codeSite)
+				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4, isSite, codeSite)
 				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4,isSite,codeSite)
+					kvAgain := GetKVAll(text, "", nil, 4, isSite, codeSite)
 					MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
 				}
 				newBpkg.ColonKV = finalKv
-				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil,isSite,codeSite)
+				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil, isSite, codeSite)
 				(*blockPackage)[index] = newBpkg
 			}
 		}
@@ -745,7 +807,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	//中标人排序
 	if isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
 		for _, v := range *blockPackage {
-			v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2,isSite,codeSite)
+			v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2, isSite, codeSite)
 		}
 	}
 	return true, surplusText
@@ -792,21 +854,21 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
 		//} else
 		if strings.Contains(text, "\n") {
 			texts := strings.Split(text, "\n")
-			text2 :=""
+			text2 := ""
 			if ik+1 < len(indexs)-1 {
 				if startEndMap[indexs[ik+1+1]] != 0 {
 					text2 = con[startEndMap[indexs[ik+1]]:startEndMap[indexs[ik+1+1]]]
 				} else {
 					text2 = con[indexs[ik+1]:indexs[ik+1+1]]
 				}
-				if texts[len(texts)-1] == text2{
+				if texts[len(texts)-1] == text2 {
 					text = texts[0]
 				}
 			}
 		}
-		if utf8.RuneCountInString(text)<5{
+		if utf8.RuneCountInString(text) < 5 {
 			indexTextMap[iv] = tmptext
-		}else {
+		} else {
 			indexTextMap[iv] = text
 		}
 		warpCount := len(regSpliteSegment.FindAllStringIndex(text, -1))
@@ -876,8 +938,8 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
 }
 
 //分块之后的kv
-func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock,isSite bool,codeSite string) []*util.Kv {
-	blocks, _ := DivideBlock(tp, text, from, ruleBlock,isSite,codeSite)
+func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock, isSite bool, codeSite string) []*util.Kv {
+	blocks, _ := DivideBlock(tp, text, from, ruleBlock, isSite, codeSite)
 	kvs := []*util.Kv{}
 	for _, v := range blocks {
 		//util.Debug(v.Text)

+ 2 - 2
src/jy/pretreated/multipackage.go

@@ -68,11 +68,11 @@ func CheckMultiPackage(con, title string) (content string, m map[string][]string
 			//log.Println("all: ", k)
 		}
 	}
+	content = con
 	con = PreCheckMulti.ReplaceAllString(con, "")
 	con = PreCon.ReplaceAllString(con, "\n")
-	content = con
 	con = PreCon2.ReplaceAllString(con, "")
-	con = PreCon1.ReplaceAllString(con, "")
+	//con = PreCon1.ReplaceAllString(con, "")
 	res := MultiReg.FindAllStringSubmatch(con, -1)
 	if len(res) > 0 { //5 6
 		mindex := map[string]int{}

+ 9 - 0
udpcreateindex/src/config.json

@@ -42,11 +42,20 @@
         "multiIndex": ""
     },
     "project": {
+		"addr": "172.17.4.189:27082",
+        "size": 2,
         "db": "extract_kf",
         "collect": "huawei_project",
         "index": "projectset_v1",
         "type": "projectset"
     },
+    "project2": {
+
+        "db": "extract_kf",
+        "collect": "huawei_project",
+        "index": "project_v2",
+        "type": "project"
+    },
     "standard": {
  		"addr": "172.17.145.163:27082",
         "size": 10,

+ 11 - 3
udpcreateindex/src/main.go

@@ -16,6 +16,7 @@ var (
 	Sysconfig          map[string]interface{} //配置文件
 	mgo                *mongodb.MongodbSim    //mongodb操作对象
 	extractmgo         *mongodb.MongodbSim    //mongodb操作对象
+	project2db         *mongodb.MongodbSim    //mongodb操作对象
 	mgostandard        *mongodb.MongodbSim    //mongodb操作对象
 	udpclient          mu.UdpClient           //udp对象
 	updport            string
@@ -25,7 +26,7 @@ var (
 	multiIndex         []string
 	BulkSize           = 400
 
-	winner, bidding, biddingback, project, buyer, standard map[string]interface{}
+	winner, bidding, biddingback, project, project2, buyer, standard map[string]interface{}
 )
 
 func init() {
@@ -39,6 +40,7 @@ func init() {
 	bidding, _ = Sysconfig["bidding"].(map[string]interface{})
 	biddingback, _ = Sysconfig["biddingback"].(map[string]interface{})
 	project, _ = Sysconfig["project"].(map[string]interface{})
+	project2, _ = Sysconfig["project2"].(map[string]interface{})
 	mconf, _ := Sysconfig["mongodb"].(map[string]interface{})
 	mgo = &mongodb.MongodbSim{
 		MongodbAddr: mconf["addr"].(string),
@@ -46,7 +48,12 @@ func init() {
 		DbName:      mconf["db"].(string),
 	}
 	mgo.InitPool()
-
+	project2db = &mongodb.MongodbSim{
+		MongodbAddr: project2["addr"].(string),
+		Size:        util.IntAllDef(project2["pool"], 5),
+		DbName:      project2["db"].(string),
+	}
+	project2db.InitPool()
 	savedb, _ := Sysconfig["savedb"].(map[string]interface{})
 	if savedb == nil {
 		log.Println("未设置保存数据库,默认使用招标库")
@@ -91,6 +98,7 @@ func init() {
 }
 
 func main() {
+	go task_projects()
 	updport := Sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
 	udpclient.Listen(processUdpMsg)
@@ -140,7 +148,7 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					defer func() {
 						<-pool
 					}()
-					projectTask(data, mapInfo)
+					projectTask(data, project, mapInfo)
 				}()
 			case "biddingback": //不联表,使用bidding表直接调用mongo库生成索引
 				pool <- true

+ 10 - 3
udpcreateindex/src/projectindex.go

@@ -9,10 +9,11 @@ import (
 	"qfw/util"
 	elastic "qfw/util/elastic"
 
+	mgov "gopkg.in/mgo.v2"
 	"gopkg.in/mgo.v2/bson"
 )
 
-func projectTask(data []byte, mapInfo map[string]interface{}) {
+func projectTask(data []byte, project, mapInfo map[string]interface{}) {
 	defer util.Catch()
 	q, _ := mapInfo["query"].(map[string]interface{})
 	if q == nil {
@@ -23,8 +24,14 @@ func projectTask(data []byte, mapInfo map[string]interface{}) {
 			},
 		}
 	}
-	session := extractmgo.GetMgoConn(3600)
-	defer extractmgo.DestoryMongoConn(session)
+	var session *mgov.Session
+	if project["addr"] != nil {
+		session = project2db.GetMgoConn(3600)
+		defer project2db.DestoryMongoConn(session)
+	} else {
+		session = extractmgo.GetMgoConn(3600)
+		defer extractmgo.DestoryMongoConn(session)
+	}
 	c, _ := project["collect"].(string)
 	db, _ := project["db"].(string)
 	index, _ := project["index"].(string)

+ 28 - 0
udpcreateindex/src/task.go

@@ -0,0 +1,28 @@
+// task定时执行项目索引
+package main
+
+import (
+	"log"
+	"time"
+
+	"github.com/cron"
+)
+
+func task_projects() {
+	c := cron.New()
+	_ = c.AddFunc("20 30 5 * * *", func() {
+		t := time.Now()
+		pici := time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, time.Local).Unix()
+		log.Println(pici)
+		mapInfo := map[string]interface{}{
+			"query": map[string]interface{}{
+				"pici": map[string]interface{}{
+					"$gte": pici - 86400,
+					"$lte": pici,
+				},
+			},
+		}
+		projectTask([]byte{}, project2, mapInfo)
+	})
+	c.Start()
+}