Sfoglia il codice sorgente

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

apple 4 anni fa
parent
commit
fd00ff9b9f

+ 9 - 9
fullproject/src_v1/config.json

@@ -1,5 +1,5 @@
 {
-    "loadStart": 1,
+    "loadStart": 0,
 	"validdays":150,
     "statusdays": 15,
 	"mongodbServers": "192.168.3.207:27092",
@@ -8,16 +8,15 @@
 	"hints":"publishtime_1",
     "extractColl": "jh_info",
     "projectColl": "jh_project",
-    "updateColl": "jh_info",
-    "backupFlag": false,
+    "backupFlag": true,
     "siteColl": "site",
     "thread": 1,
     "jkmail": {
         "to": "wangjianghan@topnet.net.cn",
-        "api": "http://10.171.112.160:19281/_send/_mail"
+        "api": "http://172.17.145.179:19281/_send/_mail"
     },
     "es": {
-        "addr": "http://127.0.0.1:9800",
+        "addr": "http://192.168.3.128:9800",
         "index": "projectset",
         "itype": "projectset",
         "pool": 10
@@ -31,7 +30,7 @@
         },
         {
             "addr": "127.0.0.1",
-            "port": 14833,
+            "port": 1483,
             "memo": "修改项目创建new"
         }
     ],
@@ -54,15 +53,16 @@
         "中班椅", "书架", "书柜", "休息台", "不争辩", "不准确", "不压缩", "不处于", "不挑剔", "不留密码", "中标候选", "中级", "义务", "不存", "东山", "东莞", "中山", "佛山", "南方基地", "不处", "中材", "中美合资", "五效", "万元整", "不漏项", "东进西移",
         "业务", "中级职称", "水土", "成都", "税率为", "合肥", "天津", "保养等", "东湖", "万张图片", "仪表", "中标人", "中标内容", "书桌", "不卡顿", "不跳帧", "不完整", "不组织", "之中", "东建业", "东北特钢", "业主", "不锈钢", "中转",
         "凭借栏杆", "不含", "不含斜", "中杯", "光分纤箱", "光交箱","宜宾", "合柜", "性质", "不及格等", "中央空调", "中检", "乌鲁木齐", "五个环", "乙方", "保密", "保证", "人民币", "无效", "以活动", "从化", "休息室", "位置", "储入", "中间件", "公众",
-        "关键", "化学", "家禽", "水产等", "羊肉", "运行", "金属", "定标", "银行", "本科", "理由", "第五章", "计算机", "通信", "项目管理", "评标", "利益", "香港", "在家", "年度", "不得", "不提供", "中职教材", "以及", "业务", "种类", "从业人员"],
+        "关键", "化学", "家禽", "水产等", "羊肉", "运行", "金属", "定标", "银行", "本科", "理由", "第五章", "计算机", "通信", "项目管理", "评标", "利益", "香港", "在家", "年度", "不得", "不提供", "中职教材", "以及", "业务", "种类", "从业人员", "公告于"],
     "winner": {
         "pre_regexp": ["及报价\\d拟中标人第一:", "中标金额(元)", "第\\d中标候单位:", "第\\d中标侯选人:", "及报价\\d", "成交金额(元)", "成交金额", "成交金额包\\d", "成交金额(元)", "成交金额", "中标供应商为", "</tr><tr><tdcolspan=\"8\">",
             "中标供应商如下:", "中标供应商为", "中标供应商联系地址中标金额(万元)\\d", "基本情况名称:", "基本情况中标候选人第\\d+名:", "______", "第一中标侯选人名称: \\d", "第一中标侯选单位:", "第一中标供应商-", "第一中标排序人:", "第一中标排序人",
             "第一成交侯选人:", "第一成交供应商-", "第一成交候选供应商及报价:", "拟定供应商名称:", "名单及其排序名次第一名单位名称", "名单及其排序名\\d次第一名单位名称", "名单及其排序推荐中标人单位名称", "名单及其排序", "名称:", "是:", "\\(成交\\)", "\\)¥41100.00",
             "\\)为:", "预成交单位:", "最终报价第(二|三)低的", "同意推荐最终报价最低的", "单位名称", "的供应商", "推荐", "第一名", "\\([A-Z]\\)", "([A-Z])", "<[^>]*>", "[^a-zA-Z\\p{Han}]{1,}", "[A-Z](-|、)", "A\\d+", "B\\d+", "?[a-zA-Z]?[包|段]",
-            "①"],
+            "①", "投标人被人民法院[,]{0,}"],
         "back_regexp": ["为中标人$", "为预中标供应商$", "为成交候选人$", "为该项目的预成交单位$", "为成交单位$", "为本次招标项目的中标单位$", "(公司$", "工期:0质量要求:null保证金金额:null$", ",报价:182391224.2900工期$", "786000元$", "预成交人地址",
-            "为该项目的预成交单位$", "为成交单位$", "预成交单位$", ",?(投标报价|报价)\\d{1,}\\.\\d{1,}(万?)元$", ",?(投标报价|报价)\\d+.?\\d+(万?)元$", ",?(投标报价|报价)\\d+.?\\d+(万?)元/平方米$", ",\\[慧通主要产品报价点击下载\\]$", ",$", ",?中标金额\\d{1,}(万?)元$", ",?中标金额\\d{1,}\\.\\d{1,}(万?)元$",
+            "为该项目的预成交单位$", "为成交单位$", "预成交单位$", ",投标人被人民法院$",
+            ",?(投标报价|报价)\\d{1,}\\.\\d{1,}(万?)元$", ",?(投标报价|报价)\\d+.?\\d+(万?)元$", ",?(投标报价|报价)\\d+.?\\d+(万?)元/平方米$", ",\\[慧通主要产品报价点击下载\\]$", ",$", ",?中标金额\\d{1,}(万?)元$",",?中标金额\\d{1,}\\.\\d{1,}(万?)元$",
             ",_$", "单位名称$", ",为本项目.*$", "[^a-zA-Z\\p{Han}]{1,}$"],
         "back_rep_regexp": ["(有限公$)#有限公司", "(有限责任公$)#有限责任公司", "(公司公司&)#公司", "(公司等&)#公司"],
         "blacklist": ["项目废标", "标项内容", "单位名称", "null", "不足3家", "中标单位", "公告时间", "(或印鉴)", "中标金额", "法定代表人", "员会名单", "含全部内", "工期", "报价", "名称得分", "元)备注", "的全部内", "地址", "\\.\\.", "法定家数", "的投标人",

+ 0 - 2
fullproject/src_v1/init.go

@@ -21,7 +21,6 @@ var (
 	Sysconfig                                      map[string]interface{} //读取配置文件
 	MongoTool                                      *MongodbSim            //mongodb连接
 	ExtractColl, ProjectColl, BackupColl, SiteColl string                 //抽取表、项目表、项目快照表、站点表
-	UpdateColl                                     string                 // 金额修改数据表
 	Thread                                         int                    //配置项线程数
 	//NextNode                 []interface{}
 	BlackList    								   []interface{}
@@ -66,7 +65,6 @@ func init() {
 
 	ExtractColl = Sysconfig["extractColl"].(string)
 	ProjectColl = Sysconfig["projectColl"].(string)
-	UpdateColl = Sysconfig["updateColl"].(string)
 	BackupColl = Sysconfig["projectColl"].(string) + "_back"
 	SiteColl = Sysconfig["siteColl"].(string)
 	Thread = util.IntAll(Sysconfig["thread"])

+ 1 - 1
fullproject/src_v1/load_data.go

@@ -18,7 +18,7 @@ func (p *ProjectTask) loadData(starttime int64) {
 	sess := MongoTool.GetMgoConn()
 	defer MongoTool.DestoryMongoConn(sess)
 	q := map[string]interface{}{
-		"lasttime": map[string]interface{}{"$gt": starttime},
+		"lasttime": map[string]interface{}{"$gte": starttime},
 	}
 	it := sess.DB(MongoTool.DbName).C(p.coll).Find(&q).Iter()
 	n := 0

+ 5 - 27
fullproject/src_v1/main.go

@@ -36,7 +36,6 @@ func init() {
 			Port: util.IntAll(m["port"]),
 		})
 	}
-
 	es := Sysconfig["es"].(map[string]interface{})
 	Es = &elastic.Elastic{
 		S_esurl: util.ObjToString(es["addr"]),
@@ -121,28 +120,6 @@ func mainT() {
 	time.Sleep(99999 * time.Hour)
 }
 
-func mainS() {
-	id := "5987e5e85d11e1c745d36c4c"
-	mapinfo := map[string]interface{}{}
-	mapinfo["id"] = id
-	mapinfo["stype"] = "updateMoney"
-	mapinfo["budget"] = "12345"
-	mapinfo["ip"] = "127.0.0.1"
-	mapinfo["port"] = Sysconfig["udpport"]
-	if Sysconfig["loadStart"] != nil {
-		loadStart := util.Int64All(Sysconfig["loadStart"])
-		if loadStart > -1 {
-			P_QL.loadData(loadStart)
-		}
-	}
-	P_QL.loadSite()
-	P_QL.currentType = mapinfo["stype"].(string)
-	P_QL.pici = time.Now().Unix()
-	P_QL.taskUpdateMoney(mapinfo)
-	P_QL.Brun = true
-	time.Sleep(20 * time.Second)
-}
-
 //udp调用信号
 func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 	switch act {
@@ -189,24 +166,25 @@ func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
 					P_QL.pici = time.Now().Unix()
 					P_QL.taskUpdateInfo(mapInfo)
 				}()
-			case "updateMoney": //修改金额
+			case "updatePro": //修改项目外围字段(只修改外围字段值)
 				go func() {
 					defer func() {
 						<-SingleThread
 					}()
 					P_QL.currentType = tasktype
 					P_QL.pici = time.Now().Unix()
-					P_QL.taskUpdateMoney(mapInfo)
+					P_QL.taskUpdatePro(mapInfo)
 				}()
-			case "updateMoneyMgo": //修改金额
+			case "deleteInfo":	// 删除招标公告
 				go func() {
 					defer func() {
 						<-SingleThread
 					}()
 					P_QL.currentType = tasktype
 					P_QL.pici = time.Now().Unix()
-					P_QL.taskQuery()
+					P_QL.delInfoPro(mapInfo)
 				}()
+
 			case "history": //历史数据合并,暂时不写
 				go func() {
 					defer func() {

+ 46 - 21
fullproject/src_v1/project.go

@@ -426,24 +426,6 @@ var FIELDS = []string{
 	"package",
 }
 
-var bidtype = map[string]string{
-	"招标": "招标",
-	"邀标": "邀标",
-	"询价": "询价",
-	"单一": "单一",
-	"竞价": "竞价",
-	"竞谈": "竞谈",
-}
-
-var bidstatus = map[string]string{
-	"预告": "预告",
-	"中标": "中标",
-	"成交": "成交",
-	"废标": "废标",
-	"流标": "流标",
-	"合同": "合同",
-}
-
 //招标时间zbtime、中标时间jgtime、项目状态bidstatus、招标类型bidtype、最后发布时间lasttime、首次发布时间firsttime
 
 func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (string, *ProjectInfo) {
@@ -556,7 +538,9 @@ func (p *ProjectTask) NewProject(tmp map[string]interface{}, thisinfo *Info) (st
 	if len(thisinfo.WinnerOrder) > 0 {
 		var list = []string{}
 		for _, v := range thisinfo.WinnerOrder {
-			list = append(list, qu.ObjToString(v["entname"]))
+			if BinarySearch(list, qu.ObjToString(v["entname"])) == -1 {
+				list = append(list, qu.ObjToString(v["entname"]))
+			}
 		}
 		set["winnerorder"] = list
 		p1.Winnerorder = list
@@ -828,7 +812,6 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 			set["district"] = thisinfo.District
 		}
 	}
-	set["district"] = thisinfo.District
 	//6--项目名称
 	if (thisinfo.ProjectName != "" && pInfo.ProjectName == "") || (len([]rune(pInfo.ProjectName)) < 6 && thisinfo.LenPN > 6) {
 		pInfo.ProjectName = thisinfo.ProjectName
@@ -912,7 +895,9 @@ func (p *ProjectTask) UpdateProject(tmp map[string]interface{}, thisinfo *Info,
 	if len(thisinfo.WinnerOrder) > 0 {
 		var list = []string{}
 		for _, v := range thisinfo.WinnerOrder {
-			list = append(list, qu.ObjToString(v["entname"]))
+			if BinarySearch(list, qu.ObjToString(v["entname"])) == -1 {
+				list = append(list, qu.ObjToString(v["entname"]))
+			}
 		}
 		set["winnerorder"] = list
 		pInfo.Winnerorder = list
@@ -1342,3 +1327,43 @@ func IsCreatePro(info *Info) (bol bool) {
 	}
 	return bol
 }
+
+var bidtype = map[string]string{
+	"招标": "招标",
+	"邀标": "邀标",
+	"询价": "询价",
+	"单一": "单一",
+	"竞价": "竞价",
+	"竞谈": "竞谈",
+}
+
+var bidstatus = map[string]string{
+	"预告": "预告",
+	"中标": "中标",
+	"成交": "成交",
+	"废标": "废标",
+	"流标": "流标",
+	"合同": "合同",
+}
+
+func GetBidTypeAndBidStatus(info *Info) (string, string) {
+	typeStr := bidtype[info.TopType]
+	statusStr := bidstatus[info.SubType]
+	if info.Infoformat == 2 || info.SubType == "拟建" {
+		statusStr = "拟建"
+		typeStr = ""
+	} else {
+		if bidtype[typeStr] == "" {
+			typeStr = "招标"
+		}
+		if typeStr == "招标" {
+			statusStr = typeStr
+		} else {
+			if statusStr == "" {
+				statusStr = "其它"
+			}
+		}
+	}
+	return typeStr, statusStr
+
+}

+ 165 - 271
fullproject/src_v1/task.go

@@ -6,6 +6,7 @@ import (
 	"gopkg.in/mgo.v2/bson"
 	"log"
 	mu "mfw/util"
+	"mgoutil/mongodb"
 	"qfw/util"
 	"regexp"
 	"strings"
@@ -13,6 +14,7 @@ import (
 	"time"
 	"unicode/utf8"
 
+	"github.com/goinggo/mapstructure"
 	"github.com/robfig/cron"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 )
@@ -29,12 +31,26 @@ var BackRegexp = map[string][]*regexp.Regexp{}
 var BackRepRegexp = map[string][]RegexpInfo{}
 var BlackRegexp = map[string][]*regexp.Regexp{}
 
+var (
+	//从标题获取项目编号
+	titleGetPc  = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)")
+	titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]")
+	titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$")
+	//项目编号过滤
+	pcReplace = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-;{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$|[ __]+|((采购)?项目|采购(项目)?)$")
+	//项目编号只是数字或只是字母4个以下
+	StrOrNum = regexp.MustCompile("^[0-9_-]{1,4}$|^[a-zA-Z_-]{1,4}$")
+	//纯数字或纯字母
+	StrOrNum2 = regexp.MustCompile("^[0-9_-]+$|^[a-zA-Z_-]+$")
+	//含分包词,招标未识别分包  合并到一个项目
+	KeyPackage = regexp.MustCompile("[0-9a-zA-Z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]+.{0,2}(包|段)|(包|段)[0-9a-zA-Z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]+.{0,2}")
+)
+
 type RegexpInfo struct {
 	regs   *regexp.Regexp
 	repstr string
 }
 
-
 //项目合并对象
 type ProjectTask struct {
 	InitMinTime int64 //最小时间,小于0的处理一次
@@ -322,187 +338,53 @@ func (p *ProjectTask) taskZl(udpInfo map[string]interface{}) {
 //招标字段更新
 func (p *ProjectTask) taskUpdateInfo(udpInfo map[string]interface{}) {
 	defer util.Catch()
-	db := MongoTool.DbName
-	coll, _ := udpInfo["coll"].(string)
-	if coll == "" {
-		coll = ExtractColl
-	}
-	thread := util.IntAllDef(Thread, 4)
-	if thread > 0 {
-		p.thread = thread
+	infoid := udpInfo["infoid"].(string)
+	infoMap := MongoTool.FindById(ExtractColl, infoid)
+	if infoMap["modifyinfo"] == nil {
+		util.Debug("does not exist modifyinfo ---,", infoid)
+		return
 	}
-	q, _ := udpInfo["query"].(map[string]interface{})
-	gtid := udpInfo["gtid"].(string)
-	lteid := udpInfo["lteid"].(string)
-	if q == nil {
-		q = map[string]interface{}{
-			"_id": map[string]interface{}{
-				"$gte": StringTOBsonId(gtid),
-				"$lte": StringTOBsonId(lteid),
-			},
-			"is_m": 1,
-		}
+	client := Es.GetEsConn()
+	defer Es.DestoryEsConn(client)
+	esquery := `{"query": {"bool": {"must": [{"match": {"ids": "`+infoid+`"}}]}}}`
+	data := Es.Get(Index, Itype, esquery)
+	if len(*data) > 0 {
+		pid := util.ObjToString(((*data)[0])["_id"])
+		p.updateJudge(infoMap, pid)
+	}else {
+		util.Debug("not find project---,", infoid)
 	}
-	log.Println("查询语句:", q)
-	p.enter(db, coll, q)
 }
 
-func (p *ProjectTask) taskQuery() {
+func (p *ProjectTask) taskUpdatePro(udpInfo map[string]interface{}) {
 	defer util.Catch()
-	count := 0
-	sess := MongoTool.GetMgoConn()
-	defer MongoTool.DestoryMongoConn(sess)
-	fields := map[string]interface{} {"budget": 1, "bidamount": 1, "package": 1}
-	ms := sess.DB(MongoTool.DbName).C(UpdateColl).Find(map[string]interface{}{}).Select(fields)
-	query := ms.Iter()
-L:
-	for {
-		tmp := make(map[string]interface{})
-		if query.Next(&tmp) {
-			lastid := tmp["_id"]
-			tmp["id"] = tmp["_id"].(primitive.ObjectID).Hex();
-			if count%1000 == 0 {
-				log.Println("current modify", count, lastid)
-			}
-			p.taskUpdateMoney(tmp)
-			count++
-		} else {
-			break L
-		}
+	util.Debug(udpInfo)
+	pid := util.ObjToString(udpInfo["pid"])
+	updateMap := util.ObjToMap(udpInfo["updateField"])
+	if pid == "" || len(*updateMap) == 0 {
+		util.Debug("参数有误")
+		return
 	}
-}
-
-//修改公告信息的预算/中标金额
-func (p *ProjectTask) taskUpdateMoney(udpInfo map[string]interface{}) {
-	defer util.Catch()
-	id := udpInfo["id"].(string)
-	budget := util.Float64All(udpInfo["budget"])
-	bidamount := util.Float64All(udpInfo["bidamount"])
-
-	client := Es.GetEsConn()
-	defer Es.DestoryEsConn(client)
-	esquery := `{"query": {"bool": {"must": [{"term": {"list.infoid": "`+id+`"}}]}}}`
-	data := Es.Get(Index, Itype, esquery)
-	if len(*data) > 0 {
-		pid := util.ObjToString((*data)[0]["_id"])
-		pro := MongoTool.FindById(ProjectColl, pid)
-		if len(pro) == 0 {
-			util.Debug("未找到项目, pid=", pid)
-			return
-		}
-		var info *map[string]interface{}
-		for _, v := range []interface{}(pro["list"].(primitive.A)){
-			v1 := v.(map[string]interface{})
-			if util.ObjToString(v1["infoid"]) == id {
-				info = util.ObjToMap(v)
-				infoField := util.ObjToMap(pro["infofield"])
-				if udpInfo["budget"] != nil{
-					util.Debug("update-------", (*info)["infoid"])
-					//if pro["budget"] == (*info)["budget"] {
-					//	pro["budget"] = budget
-					//}
-					//多包中的金额
-					if util.IntAll(pro["multipackage"]) == 1 {
-						if packages, ok := pro["package"].(map[string]interface{}); ok {
-						M :
-							for k, v := range packages{
-								v1 := []interface{}(v.(primitive.A))
-								for _, v2 := range v1{
-									v3 := v2.(map[string]interface{})
-									if util.ObjToString(v3["infoid"]) == id {
-										if v3["budget"] != nil {
-											pkg := udpInfo["package"].(map[string]interface{})
-											tmp := pkg[k].(map[string]interface{})
-											v3["budget"] = tmp["budget"]
-										}
-									}else {
-										break M
-									}
-								}
-							}
-						}
-					}
-					(*info)["budget"] = budget
-					(*util.ObjToMap((*infoField)[id]))["budget"] = budget
-					if pro["sortprice"] == (*info)["budget"] {
-						pro["sortprice"] = budget
-					}
-				}else {
-					delete(*info, "budget")
-				}
-				if udpInfo["bidamount"] != nil{
-					//if pro["bidamount"] == (*info)["bidamount"] {
-					//	pro["bidamount"] = bidamount
-					//}
-					v1["bidamount"] = bidamount
-					if util.IntAll(pro["multipackage"]) == 1 {
-						if packages, ok := pro["package"].(map[string]interface{}); ok {
-							for k, v := range packages{
-								v1 := []interface{}(v.(primitive.A))
-								for _, v2 := range v1{
-									v3 := v2.(map[string]interface{})
-									if util.ObjToString(v3["infoid"]) == id {
-										if v3["bidamount"] != nil {
-											pkg := udpInfo["package"].(map[string]interface{})
-											tmp := pkg[k].(map[string]interface{})
-											v3["bidamount"] = tmp["bidamount"]
-										}
-									}
-								}
-							}
-						}
-					}
-					(*info)["bidamount"] = bidamount
-					(*util.ObjToMap((*infoField)[id]))["bidamount"] = bidamount
-					if pro["sortprice"] == (*info)["bidamount"] {
-						pro["sortprice"] = bidamount
-					}
-				}else {
-					delete(*info, "bidamount")
-				}
-				break
-			}
-		}
-		var project *ProjectInfo
-		var pInfo *Info
-		bys, _ := json.Marshal(pro)
-		_ = json.Unmarshal(bys, &project)
-		bys1, _ := json.Marshal(info)
-		_ = json.Unmarshal(bys1, &pInfo)
-		if len(project.Ids) > 1 {
-			CountAmount(project, pInfo, *info)
-			if project.Budget > 0 {
-				pro["budget"] = project.Budget
-			}
-			if project.Bidamount > 0 {
-				pro["bidamount"] = project.Bidamount
-			}
-		}else {
-			pro["budget"] = budget
-			pro["bidamount"] = bidamount
-			if budget > bidamount {
-				pro["sortprice"] = budget
+	proMap := MongoTool.FindById(ProjectColl, pid)
+	if len(proMap) > 1 {
+		proMap["reason"] = "直接修改项目字段信息"
+		backupPro(proMap)
+		delete(proMap, "reason")
+		updataMap := make(map[string]interface{})
+		modifyInfo := make(map[string]interface{})
+		for k, v := range *updateMap{
+			if strings.Contains(k, "time") {
+				updataMap[k] = util.Int64All(v)
 			}else {
-				pro["sortprice"] = bidamount
+				updataMap[k] = v
 			}
+			modifyInfo[k] = true
 		}
-		set := map[string]interface{}{
-			"$set": pro,
-		}
-		MongoTool.UpdateById(ProjectColl, pid, set)
-
-		loadStart := util.Int64All(Sysconfig["loadStart"])
-		if loadStart > -1 && project.LastTime >loadStart {
-			util.Debug("内存中存在该项目信息", project.Id)
-			p.AllIdsMapLock.Lock()
-			p.AllIdsMap[pid].P = project
-			p.AllIdsMapLock.Unlock()
-		}
-
-		bol := Es.DelById(Index, Itype, pid)
+		updataMap["modifyinfo"] = modifyInfo
+		util.Debug(updataMap)
+		bol := MongoTool.UpdateById(ProjectColl, pid, map[string]interface{}{"$set": updataMap})
 		if bol {
-			util.Debug("删除es索引, pid------", pid)
-			//调udp生索引
+			//es索引
 			by, _ := json.Marshal(map[string]interface{}{
 				"query": map[string]interface{}{
 					"_id": bson.M{
@@ -514,24 +396,40 @@ func (p *ProjectTask) taskUpdateMoney(udpInfo map[string]interface{}) {
 			util.Debug(string(by))
 			_ = udpclient.WriteUdp(by, mu.OP_TYPE_DATA, toaddr[1])
 		}
+		// 内存
+		var pro ProjectInfo
+		err := mapstructure.Decode(proMap, &pro)
+		if err != nil {
+			util.Debug(err)
+		}
+		p.AllIdsMapLock.Lock()
+		if v, ok := p.AllIdsMap[pid]; ok {
+			v.P = &pro
+		}
+		p.AllIdsMapLock.Unlock()
+	}else {
+		util.Debug("Not find project---", pid)
 	}
 }
 
-func FindMoney(key string, project map[string]interface{}) float64 {
-	money := -0.1
-	for i, v := range []interface{}(project["list"].(primitive.A)){
-		v1 := v.(map[string]interface{})
-		if i == 0 {
-			if v1[key] != nil {
-				money = util.Float64All(v1[key])
-			}
-		}else {
-			if v1[key] != nil && util.Float64All(v1[key]) > money {
-				money = util.Float64All(v1[key])
-			}
-		}
+func (p *ProjectTask) delInfoPro(udpInfo map[string]interface{}) {
+	defer util.Catch()
+	util.Debug(udpInfo)
+	infoid := util.ObjToString(udpInfo["infoid"])
+	if infoid == "" {
+		util.Debug("参数有误")
+		return
+	}
+	client := Es.GetEsConn()
+	defer Es.DestoryEsConn(client)
+	esquery := `{"query": {"bool": {"must": [{"match": {"ids": "`+infoid+`"}}]}}}`
+	data := Es.Get(Index, Itype, esquery)
+	if len(*data) > 0 {
+		pid := util.ObjToString(((*data)[0])["_id"])
+		p.delJudge(infoid, pid)
+	}else {
+		util.Debug("not find project---,", infoid)
 	}
-	return money
 }
 
 func StringTOBsonId(id string) primitive.ObjectID {
@@ -588,13 +486,8 @@ func (p *ProjectTask) enter(db, coll string, q map[string]interface{}) {
 						p.fillInPlace(tmp)
 						info := ParseInfo(tmp)
 						p.currentTime = info.Publishtime
-						if p.currentType == "updateInfo" {
-							//招标信息更改合并
-							p.updateJudge(tmp, info)
-						} else {
-							//普通合并
-							p.CommonMerge(tmp, info)
-						}
+						//普通合并
+						p.CommonMerge(tmp, info)
 					} else {
 						//信息错误,进行更新
 						util.Debug(tmp["_id"])
@@ -649,21 +542,6 @@ L:
 
 }
 
-var (
-	//从标题获取项目编号
-	titleGetPc  = regexp.MustCompile("^([-0-9a-zA-Z第号采招政询电审竞#]{8,}[-0-9a-zA-Z#]+)")
-	titleGetPc1 = regexp.MustCompile("[\\[【((](.{0,6}(编号|编码|项号|包号|代码|标段?号)[::为])?([-0-9a-zA-Z第号采招政询电审竞#]{5,}([\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+[\\[\\]()()][-0-9a-zA-Z第号采招审竞#]+)?)[\\]】))]")
-	titleGetPc2 = regexp.MustCompile("([-0-9a-zA-Z第号采政招询电审竞#]{8,}[-0-9a-zA-Z#]+)(.{0,5}公告)?$")
-	//项目编号过滤
-	pcReplace = regexp.MustCompile("([\\[【((〖〔《{﹝{](重|第?[二三四再]次.{0,4})[\\]】))〗〕》}﹞}])$|[\\[\\]【】()()〖〗〔〕《》{}﹝﹞-;{}–  ]+|(号|重|第?[二三四五再]次(招标)?)$|[ __]+|((采购)?项目|采购(项目)?)$")
-	//项目编号只是数字或只是字母4个以下
-	StrOrNum = regexp.MustCompile("^[0-9_-]{1,4}$|^[a-zA-Z_-]{1,4}$")
-	//纯数字或纯字母
-	StrOrNum2 = regexp.MustCompile("^[0-9_-]+$|^[a-zA-Z_-]+$")
-	//含分包词,招标未识别分包  合并到一个项目
-	KeyPackage = regexp.MustCompile("[0-9a-zA-Z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]+.{0,2}(包|段)|(包|段)[0-9a-zA-Z一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]+.{0,2}")
-)
-
 func (p *ProjectTask) CommonMerge(tmp map[string]interface{}, info *Info) {
 	if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
 		if jsonData, ok := tmp["jsondata"].(map[string]interface{}); ok {
@@ -828,92 +706,108 @@ func ParseInfo(tmp map[string]interface{}) (info *Info) {
 	return thisinfo
 }
 
-func (p *ProjectTask) updateJudge(tmp map[string]interface{}, info *Info) {
-	index := -1
-	pInfoId := ""
-	p.AllIdsMapLock.Lock()
-	F:
-		for k, ID := range p.AllIdsMap {
-			for i, id := range ID.P.Ids {
-				if info.Id == id {
-					pInfoId = k
-					index = i
-					break F
-				}
+func (p *ProjectTask) updateJudge(infoMap map[string]interface{}, pid string) {
+	tmpPro := MongoTool.FindById(ProjectColl, pid)
+
+	modifyProMap := make(map[string]interface{}) // 修改项目的字段
+	for k := range infoMap{
+		if modifyMap, ok := infoMap["modifyinfo"].(map[string]interface{}); ok {
+			if modifyMap[k] != nil {
+				modifyProMap[k] = infoMap[k]
 			}
 		}
+	}
+	if len(modifyProMap) == 0 {
+		util.Debug("修改招标公告信息不需要修改项目信息字段", infoMap["_id"])
+		return
+	}
+	p.AllIdsMapLock.Lock()
+	_, ok := p.AllIdsMap[pid]
 	p.AllIdsMapLock.Unlock()
-	//未找到招标信息
-	if index == -1 {
-		if info != nil && !((info.pnbval == 1 && info.Buyer != "") || info.pnbval == 0) {
-			p.currentTime = info.Publishtime
-			p.startProjectMerge(info, tmp)
+	ids := []interface{}(tmpPro["ids"].(primitive.A))
+	index, position := -1, 0		// index 0:第一个,1:中间,2:最后一个   position list中位置
+
+	for i, v := range ids {
+		if util.ObjToString(v) == mongodb.BsonIdToSId(infoMap["_id"]) {
+			position = i
+			if i == 0 {
+				index = 0
+			}else if i == len(ids) - 1 {
+				index = 2
+			}else {
+				index = 1
+			}
 		}
-	} else {
-		tmpPro := MongoTool.FindById(ProjectColl, pInfoId)
-		infoList := []interface{}(tmpPro["list"].(primitive.A))
-		infoMap := infoList[index].(map[string]interface{})
-		modifyMap, f := modifyEle(infoMap, tmp)
+	}
+	if ok {
+		// 周期内
 		//projecthref字段
-		jsonData := tmp["jsondata"].(map[string]interface{})
-		if jsonData != nil && jsonData["projecthref"] != nil {
-			proHref := jsonData["projecthref"].(string)
-			tmp["projecthref"] = proHref
-			p.mapHrefLock.Lock()
-			pid := p.mapHref[proHref]
-			p.mapHrefLock.Unlock()
-			if pid == pInfoId {
-				p.modifyUpdate(pInfoId, index, info, tmp, tmpPro, modifyMap)
-				return
+		if infoMap["jsondata"] != nil {
+			jsonData := infoMap["jsondata"].(map[string]interface{})
+			if proHref, ok := jsonData["projecthref"].(string); ok {
+				p.mapHrefLock.Lock()
+				tempId := p.mapHref[proHref]
+				p.mapHrefLock.Unlock()
+				if pid == tempId {
+					p.modifyUpdate(pid, index, position, tmpPro, modifyProMap)
+				}else {
+					util.Debug("projecthref data id err---pid=" + pid, "---"+tempId)
+				}
+			}else {
+				f := modifyEle(modifyProMap)
+				if f {
+					//合并、修改
+					util.Debug("合并修改更新", "----------------------------")
+					p.mergeAndModify(pid, index, position, infoMap, tmpPro, modifyProMap)
+				} else {
+					//修改
+					util.Debug("修改更新", "----------------------------")
+					p.modifyUpdate(pid, index, position, tmpPro, modifyProMap)
+				}
+			}
+		}else {
+			f := modifyEle(modifyProMap)
+			if f {
+				//合并、修改
+				util.Debug("合并修改更新", "----------------------------")
+				p.mergeAndModify(pid, index, position, infoMap, tmpPro, modifyProMap)
+			} else {
+				//修改
+				util.Debug("修改更新", "----------------------------")
+				p.modifyUpdate(pid, index, position, tmpPro, modifyProMap)
 			}
 		}
-
-		if f {
-			//合并、修改
-			log.Println("合并修改更新", "----------------------------")
-			p.mergeAndModify(pInfoId, index, info, tmp, tmpPro, modifyMap)
-		} else {
-			//修改
-			log.Println("修改更新", "----------------------------")
-			p.modifyUpdate(pInfoId, index, info, tmp, tmpPro, modifyMap)
-		}
+	}else {
+		// 周期外
+		p.modifyUpdate(pid, index, position, tmpPro, infoMap)
 	}
 }
 
 var Elements = []string{
 	"projectname",
 	"projectcode",
+	"buyer",
 	"agency",
-	"budget",
-	"bidamount",
-	"buyerperson",
 	"area",
 	"city",
 	"publishtime",
+	"toptype",
+	"subtype",
 }
 
 /**
-判断修改的字段是否是影响合并流程的要素字段
+	修改的字段
+	修改的字段是否是影响合并流程的要素字段
 */
-func modifyEle(tmpPro map[string]interface{}, tmp map[string]interface{}) (map[string]interface{}, bool) {
-	modifyMap := map[string]interface{}{}
-	for k := range tmpPro {
-		for k1 := range tmp {
-			if k == k1 && tmpPro[k] != tmp[k1] {
-				modifyMap[k] = tmp[k1]
-				break
-			}
-		}
-	}
-	for k := range modifyMap {
-		for _, str := range Elements {
-			if k == str {
-				return modifyMap, true
-			}
+func modifyEle(tmp map[string]interface{}) bool {
+	merge := false
+	for _, str := range Elements {
+		if tmp[str] != nil {
+			merge = true
+			break
 		}
 	}
-	delete(modifyMap, "_id")
-	return modifyMap, false
+	return merge
 }
 
 //补全位置信息

File diff suppressed because it is too large
+ 826 - 215
fullproject/src_v1/update.go


+ 37 - 12
fullproject/udp/src/main.go

@@ -7,22 +7,24 @@ import (
 	mu "mfw/util"
 	"net"
 	"os"
-	"time"
 )
 
-var ip, sid, eid, stype string
+var ip, sid, eid, stype, infoid, pid, udpataField string
 var p int
 
 func main() {
 
 	flag.StringVar(&sid, "sid", "", "开始id")
 	flag.StringVar(&eid, "eid", "", "结束id")
+	flag.StringVar(&infoid, "infoid", "", "修改的公告id")
+	flag.StringVar(&pid, "pid", "", "修改的项目id")
+	flag.StringVar(&udpataField, "field", "", "修改的字段信息")
 	flag.StringVar(&ip, "ip", "127.0.0.1", "ip")
 	flag.IntVar(&p, "p", 1482, "端口")
 	flag.StringVar(&stype, "stype", "", "stype")
 	flag.Parse()
 
-	if ip != "" && p > 0 && sid != "" && eid != "" {
+	if stype != ""{
 		addr := &net.UDPAddr{
 			IP:   net.ParseIP(ip),
 			Port: p,
@@ -35,16 +37,39 @@ func main() {
 				os.Exit(0)
 			}
 		})
-		m1 := map[string]interface{}{
-			"gtid":  sid,
-			"lteid": eid,
-			"stype": stype,
+		if stype == "updatePro" {
+			udpatamap := make(map[string]interface{})
+			err := json.Unmarshal([]byte(udpataField), &udpatamap)
+			if err == nil {
+				m1 := map[string]interface{}{
+					"pid":  pid,
+					"updateField": udpatamap,
+					"stype": stype,
+				}
+				by, _ := json.Marshal(m1)
+				log.Println(string(by))
+				udp.WriteUdp(by, mu.OP_TYPE_DATA, addr)
+			}else {
+				log.Println(err)
+			}
+		}else if stype == "deleteInfo" || stype == "updateInfo" {
+			m1 := map[string]interface{}{
+				"infoid":  infoid,
+				"stype": stype,
+			}
+			by, _ := json.Marshal(m1)
+			log.Println(string(by))
+			udp.WriteUdp(by, mu.OP_TYPE_DATA, addr)
+		}else if sid != "" && eid != "" {
+			m1 := map[string]interface{}{
+				"gtid":  sid,
+				"lteid": eid,
+				"stype": stype,
+			}
+			by, _ := json.Marshal(m1)
+			log.Println(string(by))
+			udp.WriteUdp(by, mu.OP_TYPE_DATA, addr)
 		}
-
-		by, _ := json.Marshal(m1)
-		log.Println(string(by))
-		udp.WriteUdp(by, mu.OP_TYPE_DATA, addr)
-		time.Sleep(30 * time.Second)
 	} else {
 		flag.PrintDefaults()
 		log.Println("参数错误.")

+ 1 - 1
projectinfo/src/task.go

@@ -190,7 +190,7 @@ func GetProjectData(sid, eid string) {
 			//id
 			id := mgoutil.BsonIdToSId(pro["_id"])
 			pro["infoid"] = id
-			pro["jyhref"] = `https://www.jianyu360.com/article/content/` + qu.CommonEncodeArticle("content", id) + `.html`
+			pro["jyhref"] = `/jyapp/article/content/` + qu.CommonEncodeArticle("content", id) + `.html`
 			delete(pro, "_id")
 			//yucetime
 			pro["yucetime"] = time.Now().Unix()

+ 2 - 2
qyxy/src/config.json

@@ -1,11 +1,11 @@
 {
 	"mgodb": "192.168.3.207:27092",
 	"dbsize": 12,
-	"dbname": "mxs",
+	"dbname": "mixdata",
 	"dbcoll": "qyxy",
 	"savecoll": "qyxy_std",
 	"tasktime": 1,
-	"updatetime": 1597202468,
+	"updatetime": 0,
 	"elastic": {
         "addr": "http://192.168.3.11:9800",
         "index": "qyxy_v1",

+ 2 - 2
qyxy/src/main.go

@@ -70,8 +70,8 @@ func init() {
 }
 
 func main() {
-	go TimeTask()
-	//QyxyStandard()
+	//go TimeTask()
+	QyxyStandard()
 	ch := make(chan bool, 1)
 	<-ch
 }

+ 10 - 1
qyxy/src/task.go

@@ -34,6 +34,12 @@ var (
 		[]string{"report_year", "company_phone", "zip_code", "company_email", "employee_no", "operator_name"},
 		[]string{"total_assets", "total_equity", "total_sales", "total_profit", "main_business_income", "profit_amount", "total_tax", "total_liability"},
 	}
+
+	//区域code补全
+	CodeMap = map[int]string{
+		2: "0000",
+		4: "00",
+	}
 )
 
 // var AllFieldListMap = map[string]string{
@@ -389,7 +395,6 @@ func QyxyStandard() bool {
 					EsSaveFlag = false
 				}
 			}
-			//qu.Debug("esMap---", esMap)
 			// qu.Debug("mgoMap---", mgoMap)
 			// return
 			lock.Lock()
@@ -794,6 +799,10 @@ func InitAddress() {
 	address, _ := Mgo.Find("address", nil, nil, nil, false, -1, -1)
 	for _, tmp := range *address {
 		code := qu.ObjToString(tmp["code"])
+		// codeLen := len(code)
+		// if t_code := CodeMap[codeLen]; t_code != "" { //新的address表补齐code
+		// 	code = code + t_code
+		// }
 		remark := fmt.Sprint(tmp["Remarks"])
 		city := &City{}
 		tmpjson, err := json.Marshal(tmp)

+ 1 - 1
qyxy_change/qy_baidu/task.go

@@ -18,7 +18,7 @@ func TimeTask() {
 	c := cron.New()
 	cronstrBd := "0 0 */" + fmt.Sprint(BdTaskTime) + " * * ?" 		//每TaskTime小时执行一次
 	//cronstr := "0 0 " + fmt.Sprint(TaskTime) + " * * ?"			//每天TaskTime跑一次
-	cronstrPa := "0 0 15 ? * " + fmt.Sprint(PaTaskTime) 			//凭安增量数据每周跑一次
+	cronstrPa := "0 0 15 ? * " + fmt.Sprint(PaTaskTime) 			//凭安增量数据每周跑一次
 	_ = c.AddFunc(cronstrBd, func() { GetBdData() })
 	_ = c.AddFunc(cronstrPa, func() { GetPaData() })
 	c.Start()

+ 3 - 1
src/config.json

@@ -3,6 +3,8 @@
     "mgodb": "192.168.3.207:27092",
     "dbsize": 3,
     "dbname": "extract_v3xs",
+    "dbname_addrs": "extract_v3xs",
+    "dbname_addrs_c": "address_new_2020",
     "redis": "qyk_redis=192.168.3.207:6379",
     "elasticsearch": "http://127.0.0.1:9800",
     "elasticsearch_index": "winner_enterprise_tmp",
@@ -66,7 +68,7 @@
     },
     "istest": true,
     "isSaveTag": false,
-    "tomail": "zhangjinkun@topnet.net.cn,chenmingzhu@topnet.net.cn,zhaolongyue@topnet.net.cn",
+    "tomail": "fengweiqiang@topnet.net.cn",
     "api": "http://10.171.112.160:19281/_send/_mail",
     "deleteInstanceTimeHour": 1,
     "jsondata_extweight": 1

+ 46 - 19
src/jy/extract/extract.go

@@ -102,7 +102,7 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 			//if qu.ObjToString(v["sensitive"]) != ""||ggtest.MatchString(qu.ObjToString(v[""])) { //去除含敏感词数据
 			//	continue
 			//}
-			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时
+			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" || "a_hbszbtbggfwpt_kbjl" == qu.ObjToString(v["spidercode"]) { //临时开标记录
 				continue
 			}
 			var j, jf *ju.Job
@@ -326,15 +326,15 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		if (*toMap)["jsoncontent"] != nil {
 			delete(*toMap, "jsoncontent")
 		}
-		for k,v := range *toMap{
-			if _,ok := v.(float64);ok{
+		for k, v := range *toMap {
+			if _, ok := v.(float64); ok {
 				continue
-			}else if _,ok := v.(int64);ok{
+			} else if _, ok := v.(int64); ok {
 				continue
-			}else if _,ok2 := v.(string);ok2{
+			} else if _, ok2 := v.(string); ok2 {
 				continue
-			}else {
-				delete(*toMap,k)
+			} else {
+				delete(*toMap, k)
 			}
 		}
 	}
@@ -463,12 +463,11 @@ func file2text(doc *map[string]interface{}) {
 			tmpstr += bs + "\n"
 		}
 	}
-	(*doc)["detailfile"] = tmpstr
+	(*doc)["detailfile"] = strings.ReplaceAll(tmpstr, "附件", "")
 }
 
 //抽取
 func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
-
 	e.ExtractDetail(j, isSite, j.SpiderCode)
 	if jf != nil && jf.IsFile {
 		e.ExtractDetail(jf, isSite, j.SpiderCode)
@@ -649,10 +648,18 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 				if v.Field == "projectname" && v.Type == "table" {
 					break
 				}
+				if key == "budget" || key == "bidamount" {
+					if _, ok := v.Value.(float64); ok && !v.IsTrue {
+						continue
+					}
+				}
 				lockclear.Lock()
 				var cfn = []string{}
 				if isSite {
 					cfn = e.SiteClearFn[key]
+					if len(cfn) == 0 {
+						cfn = e.ClearFn[key]
+					}
 				} else {
 					cfn = e.ClearFn[key]
 				}
@@ -689,7 +696,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 		//		bs, _ := json.Marshal(j.Result)
 		//		 log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
 	}, func(err interface{}) {
-		log.Debug("ExtractProcess err", err)
+		log.Debug("ExtractProcess err", err, j.SourceMid)
 	})
 }
 func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
@@ -756,7 +763,15 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
 		for key, val := range j.Result {
 			for _, v := range val {
 				lockclear.Lock()
-				cfn := e.ClearFn[key]
+				var cfn = []string{}
+				if isSite {
+					cfn = e.SiteClearFn[key]
+					if len(cfn) == 0 {
+						cfn = e.ClearFn[key]
+					}
+				} else {
+					cfn = e.ClearFn[key]
+				}
 				lockclear.Unlock()
 				if len(cfn) == 0 {
 					continue
@@ -1195,7 +1210,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 //lua脚本根据属性设置提取kv值
 func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]map[string]interface{}, bool) {
 	kvmap := map[string][]map[string]interface{}{}
-	if len(j.Winnerorder) > 1 {
+	if len(j.Winnerorder) > 1 && qu.Float64All(j.Winnerorder[0]["sort"]) == 1 {
 		if vc.Field == "bidamount" {
 			for _, v := range j.Winnerorder {
 				if v["price"] == nil {
@@ -1760,16 +1775,17 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		for _, val := range result {
 			for _, v := range val { //取第一个非负数,项目名称除外
 				//存0是否有效
-				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue{
+				if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Score > -1 {
 					tmp[v.Field] = v.Value
 					break
 				}
-				if v.Score > -1 {
+				if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
 					tmp[v.Field] = v.Value
 					break
 				}
 			}
 		}
+		tmp["winner"] = strings.ReplaceAll(qu.ObjToString(tmp["winner"]), ",,", ",")
 		if len(j.PackageInfo) > 15 {
 			for k, v := range j.PackageInfo {
 				j.PackageInfo = map[string]map[string]interface{}{}
@@ -1873,6 +1889,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 								tmp[v.Field] = v.Value
 								break
 							}
+							if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
+								tmp[v.Field] = v.Value
+								break
+							}
 						}
 						break
 					}
@@ -1903,7 +1923,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		}
 		//城市抽取
 		if e.IsExtractCity {
-			//e.ExtractCity(j, tmp, _id)
 			e.NewExtractCity(j, &tmp, _id)
 		}
 		//品牌抽取
@@ -1917,7 +1936,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			if len(j.BrandData) > 0 {
 				tmp["tablebrand"] = j.BrandData
 			}
-			// log.Debug("============", j.HasBrand, j.HasGoods, j.HasKey, j.HasTable, j.BrandData)
 		}
 		//prince和number抽取
 		if ju.IsPriceNumber {
@@ -2065,20 +2083,26 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 	//delete(tmp, "subtype")
 	if _, ok := tmp["bidamount"].(string); ok {
 		delete(tmp, "bidamount")
-	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && fb/100 > qu.Float64All(tmp["budget"]) {
+	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && (fb/5 > qu.Float64All(tmp["budget"]) || qu.Float64All(tmp["budget"])/1000 > fb) {
 		delete(tmp, "bidamount")
 	}
 	if _, ok := tmp["budget"].(string); ok {
 		delete(tmp, "budget")
 	}
+	if _, ok := tmp["unitprice"].(string); ok {
+		delete(tmp, "unitprice")
+	}
 	if _, ok := tmp["bidopentime"].(string); ok {
 		delete(tmp, "bidopentime")
 	}
 	if _, ok := tmp["signaturedate"].(string); ok {
 		delete(tmp, "signaturedate")
 	}
+	if _, ok := tmp["supervisorrate"].(string); ok {
+		delete(tmp, "supervisorrate")
+	}
 	for k, v := range tmp {
-		if v == "" {
+		if v == "" || len(strings.TrimSpace(fmt.Sprint(v))) == 0 || strings.Contains(fmt.Sprint(v), "**") {
 			delete(tmp, k)
 		}
 	}
@@ -2398,11 +2422,14 @@ func resetWinnerorder(j *ju.Job) {
 	bidamounts := []*ju.ExtField{}
 
 	if maxlen > 0 {
+		if qu.Float64All(j.Winnerorder[0]["sort"]) != 1 {
+			return
+		}
 		winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
 		if j.Winnerorder[0]["price"] != nil {
 			tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
 			if tmpPrice[len(tmpPrice)-1].(bool) {
-				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5})
+				bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5, IsTrue: true})
 			}
 		}
 	}

+ 169 - 230
src/jy/extract/extractInit.go

@@ -89,10 +89,10 @@ type ExtractTask struct {
 	ResultChanel chan bool //抽取结果详情
 	sync.RWMutex
 	ResultArr [][]map[string]interface {
-	} //抽取结果详情
+	}                   //抽取结果详情
 	BidChanel chan bool //抽取结果
 	BidArr    [][]map[string]interface {
-	} //抽取结果
+	}            //抽取结果
 	BidTotal int //结果数量
 
 	RecogFieldMap map[string]map[string]interface {
@@ -100,7 +100,7 @@ type ExtractTask struct {
 	FidClassMap map[string][]map[string]interface {
 	} //分类
 	CidRuleMap map[string][]map[string]interface {
-	} //规则
+	}                    //规则
 	AuditFields []string //需要审核的字段名称
 
 	SiteCityMap          map[string]*SiteCity //站点对应的省市区
@@ -1000,30 +1000,14 @@ func InitProvince(version string) map[string]interface{} {
 	return fn
 }
 
-//加载城市简称
-func InitCitySim(version string) map[string]map[string]interface{} {
+//加载所有
+func InitProvincesx() []map[string]interface{} {
 	defer qu.Catch()
-	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
-	fn := map[string]map[string]interface{}{}
-	for _, v := range *list {
-		name := qu.ObjToString(v["s_name"])
-		tmp := v["content"].(map[string]interface{})
-		fn[name] = tmp
-	}
-	return fn
-}
-
-//加载城市全称
-func InitCityAll(version string) map[string]map[string]interface{} {
-	defer qu.Catch()
-	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
-	fn := map[string]map[string]interface{}{}
-	for _, v := range *list {
-		name := qu.ObjToString(v["s_name"])
-		tmp := v["content"].(map[string]interface{})
-		fn[name] = tmp
-	}
-	return fn
+	provinces := make([]map[string]interface{}, 0)
+	ju.AddrsSess.Find(map[string]interface{}{
+		"Remarks": nil,
+	}).All(&provinces)
+	return provinces
 }
 
 //加载站点库site城市信息
@@ -1065,106 +1049,183 @@ func (e *ExtractTask) InitCityInfo() {
 			e.ProvinceMap[p1] = k             //华中科技大学:湖北
 		}
 	}
+	alldata := InitProvincesx()
+
+	fnx := make([]map[string]interface{}, 0)
+	citys_maps := make(map[string][]map[string]interface{}, 0)
+	districts_maps := make(map[string]map[string][]map[string]interface{}, 0)
+	towns_maps := make(map[string]map[string]map[string][]map[string]interface{}, 0)
+	jwhs_maps := make(map[string]map[string]map[string]map[string][]map[string]interface{}, 0)
+	for _, v := range alldata {
+		codenum := len(v["code"].(string))
+		province := qu.ObjToString(v["province"])
+		city := qu.ObjToString(v["city"])
+		district := qu.ObjToString(v["district"])
+		town := qu.ObjToString(v["town"])
+		if codenum == 2 {
+			fnx = append(fnx, v)
+		} else if codenum == 4 {
+			citys_maps[province] = append(citys_maps[province], v)
+		} else if codenum == 6 {
+			if districts_maps[province] == nil {
+				districts_maps[province] = make(map[string][]map[string]interface{}, 0)
+			}
+			districts_maps[province][city] = append(districts_maps[province][city], v)
+		} else if codenum == 9 {
+			if towns_maps[province] == nil {
+				towns_maps[province] = make(map[string]map[string][]map[string]interface{}, 0)
+			}
+			if towns_maps[province][city] == nil {
+				towns_maps[province][city] = make(map[string][]map[string]interface{}, 0)
+			}
+			towns_maps[province][city][district] = append(towns_maps[province][city][district], v)
+		} else if codenum == 12 {
+			if jwhs_maps[province] == nil {
+				jwhs_maps[province] = make(map[string]map[string]map[string][]map[string]interface{}, 0)
+			}
+			if jwhs_maps[province][city] == nil {
+				jwhs_maps[province][city] = make(map[string]map[string][]map[string]interface{}, 0)
+			}
+			if jwhs_maps[province][city][district] == nil {
+				jwhs_maps[province][city][district] = make(map[string][]map[string]interface{}, 0)
+			}
+			jwhs_maps[province][city][district][town] = append(jwhs_maps[province][city][district][town], v)
+		}
+	}
+
 	//初始化城市全称
-	fn2 := InitCityAll(e.TaskInfo.Version)
-	for k, v := range fn2 {
+	for _, provinces := range fnx {
+		all_province := qu.ObjToString(provinces["all_province"]) //省全称
+		jc_province := qu.ObjToString(provinces["province"])      //省简称
 		//加载省信息
-		e.Trie_Full_Province.AddWords(k) //加入省全称Trie(k:浙江省)
+		e.Trie_Full_Province.AddWords(all_province) //加入省全称Trie(k:浙江省)
 		p := &Province{}
-		p.Name = k                            //省全称:浙江省
-		p.Brief = v["brief"].(string)         //省简称:浙江
-		e.Trie_Sim_Province.AddWords(p.Brief) //加入省简称Trie(k:浙江)
-		e.ProvinceMap[k] = p.Brief            //浙江省:浙江
-		e.ProvinceBriefMap[p.Brief] = p       //浙江:省信息{}
-		p.Cap = v["captial"].(string)         //省会(杭州)
+		p.Name = all_province                     //省全称:浙江省
+		p.Brief = jc_province                     //省简称:浙江
+		e.Trie_Sim_Province.AddWords(jc_province) //加入省简称Trie(k:浙江)
+		e.ProvinceMap[all_province] = jc_province //浙江省:浙江
+		e.ProvinceBriefMap[jc_province] = p       //浙江:省信息{}
+		if province_alias, ok := provinces["province_alias"].([]interface{}); ok {
+			for _, vprovince_alias := range province_alias {
+				e.ProvinceBriefMap[qu.ObjToString(vprovince_alias)] = p
+			}
+		}
 		//加载市信息
-		city, _ := v["city"].(map[string]interface{})
-		for k1, v1 := range city {
-			e.Trie_Full_City.AddWords(k1) //加入市全称Trie(k:杭州市)
-			v1m, _ := v1.(map[string]interface{})
+		citys := citys_maps[jc_province]
+		isok := make(map[string]bool)
+		for _, vcity := range citys {
+			qc_city := qu.ObjToString(vcity["city"])
+			jc_city := qu.ObjToString(vcity["brief_city"])
+			e.Trie_Full_City.AddWords(qc_city) //加入市全称Trie(k:杭州市)
 			c := &City{}
-			c.Name = k1                       //市全称:杭州市
-			c.Brief = v1m["brief"].(string)   //市简称:杭州
-			e.Trie_Sim_City.AddWords(c.Brief) //加入市简称Trie(k:杭州)
-			e.CityMap[k1] = c.Brief           //杭州市:杭州
-			e.CityBriefMap[c.Brief] = c       //杭州:市信息{}
-			e.CityFullMap[k1] = c             //杭州市:市信息{}
+			c.Name = qc_city //市全称:杭州市
+			if jc_city != "" {
+				c.Brief = jc_city                 //市简称:杭州
+				e.Trie_Sim_City.AddWords(c.Brief) //加入市简称Trie(k:杭州)
+				e.CityMap[qc_city] = c.Brief      //杭州市:杭州
+				e.CityBriefMap[c.Brief] = c       //杭州:市信息{}
+				e.CityFullMap[qc_city] = c        //杭州市:市信息{}
+			}
 			c.P = p
-			if c.Name == p.Cap {
-				p.Captial = c //加载province中的省会市信息{}
+			if city_alias, ok := vcity["city_alias"].([]interface{}); ok {
+				for _, vcity_alias := range city_alias {
+					strvcity_alias := qu.ObjToString(vcity_alias)
+					if isok[jc_province+"_"+strvcity_alias] {
+						continue
+					}
+					e.CityBriefMap[strvcity_alias] = c
+					e.initDistricts(jc_province, strvcity_alias, c, jc_city, districts_maps, towns_maps, jwhs_maps)
+					isok[jc_province+"_"+strvcity_alias] = true
+				}
 			}
-			//区县
-			districtmap, _ := v1m["area"].(map[string]interface{}) //区或县
-			for district, streets := range districtmap {
-				d := &District{}
-				d.Name = district
-				d.C = c
-				//省直辖市,河南济源市没有区一级,目前区一级写的还是济源市
-				//匹配时,如果匹配到区,拿区和市比对,相同则代表是省直辖市,不要区一级
-				e.Trie_Full_District.AddWords(district) //加入区或县全称Trie
-				ctmp := e.NewDistrictCityMap[district]
+			if isok[jc_province+"_"+qc_city] {
+				continue
+			}
+			e.initDistricts(jc_province, qc_city, c, jc_city, districts_maps, towns_maps, jwhs_maps)
+		}
+	}
+
+	e.Trie_Fulls = []*ju.Trie{e.Trie_Full_Province, e.Trie_Full_City, e.Trie_Full_District, e.Trie_Full_Street, e.Trie_Full_Community}
+	e.Trie_Sims = []*ju.Trie{e.Trie_Sim_Province, e.Trie_Sim_City, e.Trie_Sim_District}
+}
+
+//加载区县
+func (e *ExtractTask) initDistricts(jc_province string, qc_city string, c *City,
+	jc_city string, districts_maps map[string]map[string][]map[string]interface{},
+	towns_maps map[string]map[string]map[string][]map[string]interface{},
+	jwhs_maps map[string]map[string]map[string]map[string][]map[string]interface{}) {
+	districts := districts_maps[jc_province][qc_city]
+	for _, vdistricts := range districts {
+		qc_district := qu.ObjToString(vdistricts["district"])
+		jc_district := qu.ObjToString(vdistricts["brief_district"])
+		d := &District{}
+		d.Name = qc_district
+		d.C = c
+		e.Trie_Full_District.AddWords(qc_district) //加入区或县全称Trie
+		if jc_district != "" {
+			e.Trie_Sim_District.AddWords(jc_district) //加入区或县简称Trie
+			//初始化城市简称
+			c := e.CityBriefMap[jc_city]
+			dfullarr := e.NewDistrictSimAndAll[jc_district]
+			dfullcity := map[string]*City{qc_district: c}
+			if len(dfullarr) == 0 {
+				tmparr := []map[string]*City{dfullcity}
+				e.NewDistrictSimAndAll[jc_district] = tmparr
+			} else {
+				e.NewDistrictSimAndAll[jc_district] = append(e.NewDistrictSimAndAll[jc_district], dfullcity)
+			}
+		}
+		ctmp := e.NewDistrictCityMap[qc_district]
+		if len(ctmp) == 0 {
+			tmpcarr := []*City{c}
+			e.NewDistrictCityMap[qc_district] = tmpcarr
+		} else {
+			e.NewDistrictCityMap[qc_district] = append(e.NewDistrictCityMap[qc_district], c)
+		}
+		if district_alias, ok := vdistricts["district_alias"].([]interface{}); ok {
+			for _, vdistrict_alias := range district_alias {
+				strvdistrict_alias := qu.ObjToString(vdistrict_alias)
+				e.Trie_Full_District.AddWords(strvdistrict_alias) //加入区或县全称Trie
+				ctmp := e.NewDistrictCityMap[strvdistrict_alias]
 				if len(ctmp) == 0 {
 					tmpcarr := []*City{c}
-					e.NewDistrictCityMap[district] = tmpcarr
+					e.NewDistrictCityMap[strvdistrict_alias] = tmpcarr
 				} else {
-					e.NewDistrictCityMap[district] = append(e.NewDistrictCityMap[district], c)
-				}
-				//街道
-				streetmap, _ := streets.(map[string]interface{})
-				for street, communitys := range streetmap {
-					s := &Street{}
-					s.Name = street
-					s.D = d
-					e.Trie_Full_Street.AddWords(street) //加入街道全称Trie
-					dtmp := e.NewStreetDistrictMap[street]
-					if len(dtmp) == 0 {
-						tmpdarr := []*District{d}
-						e.NewStreetDistrictMap[street] = tmpdarr
-					} else {
-						e.NewStreetDistrictMap[street] = append(e.NewStreetDistrictMap[street], d)
-					}
-					//村、居委会
-					for _, ct := range qu.ObjArrToStringArr(communitys.([]interface{})) {
-						e.Trie_Full_Community.AddWords(ct) //加入居委会、村全称Trie
-						cttmp := e.CommunityDistrictMap[ct]
-						if len(cttmp) == 0 {
-							tmpdarr := []*District{d}
-							e.CommunityDistrictMap[ct] = tmpdarr
-						} else {
-							e.CommunityDistrictMap[ct] = append(e.CommunityDistrictMap[ct], d)
-						}
-					}
+					e.NewDistrictCityMap[strvdistrict_alias] = append(e.NewDistrictCityMap[strvdistrict_alias], c)
 				}
 			}
 		}
-	}
-
-	//初始化城市简称
-	fn3 := InitCitySim(e.TaskInfo.Version)
-	for _, v := range fn3 {
-		city, _ := v["city"].(map[string]interface{})
-		for _, v1 := range city {
-			v1m, _ := v1.(map[string]interface{})
-			cb := v1m["brief"].(string)                 //市简称
-			arr := v1m["area"].(map[string]interface{}) //区或县简称
-			for districtsim, districtall := range arr {
-				dfullstr, _ := districtall.(string)
-				e.Trie_Sim_District.AddWords(districtsim) //加入区或县简称Trie
-				c := e.CityBriefMap[cb]
-				dfullarr := e.NewDistrictSimAndAll[districtsim]
-				dfullcity := map[string]*City{dfullstr: c}
-				if len(dfullarr) == 0 {
-					tmparr := []map[string]*City{dfullcity}
-					e.NewDistrictSimAndAll[districtsim] = tmparr
+		//街道
+		towns := towns_maps[jc_province][qc_city][qc_district]
+		for _, vtown := range towns {
+			strvtown := qu.ObjToString(vtown["town"])
+			s := &Street{}
+			s.Name = strvtown
+			s.D = d
+			e.Trie_Full_Street.AddWords(strvtown) //加入街道全称Trie
+			dtmp := e.NewStreetDistrictMap[strvtown]
+			if len(dtmp) == 0 {
+				tmpdarr := []*District{d}
+				e.NewStreetDistrictMap[strvtown] = tmpdarr
+			} else {
+				e.NewStreetDistrictMap[strvtown] = append(e.NewStreetDistrictMap[strvtown], d)
+			}
+			//村、居委会
+			jwhs := jwhs_maps[jc_province][qc_city][qc_district][strvtown]
+			for _, vjwh := range jwhs {
+				strvillage := qu.ObjToString(vjwh["village"])
+				e.Trie_Full_Community.AddWords(strvillage) //加入居委会、村全称Trie
+				cttmp := e.CommunityDistrictMap[strvillage]
+				if len(cttmp) == 0 {
+					tmpdarr := []*District{d}
+					e.CommunityDistrictMap[strvillage] = tmpdarr
 				} else {
-					e.NewDistrictSimAndAll[districtsim] = append(e.NewDistrictSimAndAll[districtsim], dfullcity)
+					e.CommunityDistrictMap[strvillage] = append(e.CommunityDistrictMap[strvillage], d)
 				}
 			}
 		}
-	}
 
-	e.Trie_Fulls = []*ju.Trie{e.Trie_Full_Province, e.Trie_Full_City, e.Trie_Full_District, e.Trie_Full_Street, e.Trie_Full_Community}
-	e.Trie_Sims = []*ju.Trie{e.Trie_Sim_Province, e.Trie_Sim_City, e.Trie_Sim_District}
+	}
 }
 
 func (e *ExtractTask) InitVar() {
@@ -1226,128 +1287,6 @@ func (e *ExtractTask) InitVar() {
 
 }
 
-//初始化城市省份敏感词
-func (e *ExtractTask) InitCityDFA() {
-	defer qu.Catch()
-	e.CityAllGet = &ju.DFA{}
-	e.CitySimGet = &ju.DFA{}
-	e.DistrictAllGet = &ju.DFA{}
-	e.DistrictSimGet = &ju.DFA{}
-	e.ProvinceAllGet = &ju.DFA{}
-	e.ProvinceSimGet = &ju.DFA{}
-	e.StreetGet = &ju.DFA{}
-	//初始化map
-	if e.ProvinceMap == nil {
-		e.ProvinceMap = make(map[string]string)
-	}
-	if e.CityMap == nil {
-		e.CityMap = make(map[string]string)
-	}
-	if e.DistrictSimAndAll == nil {
-		e.DistrictSimAndAll = make(map[string]string)
-	}
-	if e.CityBriefMap == nil {
-		e.CityBriefMap = make(map[string]*City)
-	}
-	if e.CityFullMap == nil {
-		e.CityFullMap = make(map[string]*City)
-	}
-	if e.ProvinceBriefMap == nil {
-		e.ProvinceBriefMap = make(map[string]*Province)
-	}
-	if e.DistrictCityMap == nil {
-		e.DistrictCityMap = make(map[string]*City)
-	}
-	if e.StreetDistrictMap == nil {
-		e.StreetDistrictMap = make(map[string]*District)
-	}
-	//初始化省
-	fn1 := InitProvince(e.TaskInfo.Version)
-	for k, v := range fn1 {
-		for _, p := range v.([]interface{}) {
-			p1, _ := p.(string)
-			e.ProvinceAllGet.AddWord(p1) //华中科技大学
-			e.ProvinceMap[p1] = k        //华中科技大学:湖北
-		}
-	}
-
-	//初始化城市全称
-	fn2 := InitCityAll(e.TaskInfo.Version)
-	for k, v := range fn2 {
-		//加载省信息
-		e.ProvinceAllGet.AddWord(k) //加入省全称dfa(k:浙江省)
-		p := &Province{}
-		p.Name = k                        //省全称:浙江省
-		p.Brief = v["brief"].(string)     //省简称:浙江
-		e.ProvinceSimGet.AddWord(p.Brief) //加入省简称dfa(k:浙江)
-		e.ProvinceMap[k] = p.Brief        //浙江省:浙江
-		e.ProvinceBriefMap[p.Brief] = p   //浙江:省信息{}
-		p.Cap = v["captial"].(string)     //省会(杭州)
-
-		//加载市信息
-		city, _ := v["city"].(map[string]interface{})
-		for k1, v1 := range city {
-			e.CityAllGet.AddWord(k1) //加入市全称dfa(k:杭州市)
-			v1m, _ := v1.(map[string]interface{})
-			c := &City{}
-			c.Name = k1                     //市全称:杭州市
-			c.Brief = v1m["brief"].(string) //市简称:杭州
-			e.CitySimGet.AddWord(c.Brief)   //加入市简称dfa(k:杭州)
-			e.CityMap[k1] = c.Brief         //杭州市:杭州
-			e.CityBriefMap[c.Brief] = c     //杭州:市信息{}
-			e.CityFullMap[k1] = c           //杭州市:市信息{}
-			c.P = p
-			if c.Name == p.Cap {
-				p.Captial = c //加载province中的省会市信息{}
-			}
-
-			//区县
-			districtmap := v1m["area"].(map[string]interface{}) //区或县
-			for district, streetarr := range districtmap {
-				d := &District{}
-				d.Name = district
-				d.C = c
-				//省直辖市,河南济源市没有区一级,目前区一级写的还是济源市
-				//匹配时,如果匹配到区,拿区和市比对,相同则代表是省直辖市,不要区一级?
-				e.DistrictAllGet.AddWord(district) //加入区或县全称dfa
-				ctmp := e.DistrictCityMap[district]
-				if ctmp == nil {
-					e.DistrictCityMap[district] = c
-				}
-				//街道
-				for _, s := range qu.ObjArrToStringArr(streetarr.([]interface{})) {
-					e.StreetGet.AddWord(s) //加入街道敏感词
-					dtmp := e.StreetDistrictMap[s]
-					if dtmp == nil {
-						e.StreetDistrictMap[s] = d
-					}
-				}
-			}
-		}
-	}
-	//初始化城市简称
-	fn3 := InitCitySim(e.TaskInfo.Version)
-	for _, v := range fn3 {
-		city, _ := v["city"].(map[string]interface{})
-		for _, v1 := range city {
-			v1m, _ := v1.(map[string]interface{})
-			cb := v1m["brief"].(string)                 //市简称
-			arr := v1m["area"].(map[string]interface{}) //区或县简称
-			for districtsim, districtall := range arr {
-				e.DistrictSimAndAll[districtsim] = districtall.(string)
-				d := &District{}
-				d.Name = districtsim
-				d.C = e.CityBriefMap[cb]
-				e.DistrictSimGet.AddWord(districtsim) //加入区或县简称敏感词
-				ctmp := e.DistrictCityMap[districtsim]
-				if ctmp == nil {
-					e.DistrictCityMap[districtsim] = e.CityBriefMap[cb]
-				}
-			}
-		}
-	}
-}
-
 //初始化邮编库
 func (e *ExtractTask) InitPostCode() {
 	defer qu.Catch()

+ 0 - 410
src/jy/extract/extractcity2.go

@@ -1,410 +0,0 @@
-package extract
-
-import (
-	db "jy/mongodbutil"
-	ju "jy/util"
-	"log"
-	qu "qfw/util"
-	"strings"
-)
-
-var ProvinceMap2 map[string]string
-var CityBrief2 map[string]*City         //只加载一次即可
-var ProvinceBrief2 map[string]*Province //只加载一次
-var AreaToCity2 map[string][]*City      //两个文件共用
-var DistrictCityMap2 map[string]*City
-var StreetDistrictMap2 map[string]*District
-var AreaGet2 *ju.DFA         //市全称
-var AreaDistrict2 *ju.DFA    //区或县
-var AreaProvinceGet2 *ju.DFA //省
-var AreaSimGet2 *ju.DFA      //市简称
-var AreaStreet2 *ju.DFA      //街道
-
-func InitDFA2() {
-	defer qu.Catch()
-	AreaGet2 = &ju.DFA{}
-	AreaDistrict2 = &ju.DFA{}
-	AreaProvinceGet2 = &ju.DFA{}
-	AreaStreet2 = &ju.DFA{}
-	//初始化map
-	if ProvinceMap2 == nil {
-		ProvinceMap2 = make(map[string]string)
-	}
-	if CityBrief2 == nil {
-		CityBrief2 = make(map[string]*City)
-	}
-	if ProvinceBrief2 == nil {
-		ProvinceBrief2 = make(map[string]*Province)
-	}
-	if AreaToCity2 == nil {
-		AreaToCity2 = make(map[string][]*City)
-	}
-	if DistrictCityMap2 == nil {
-		DistrictCityMap2 = make(map[string]*City)
-	}
-	if StreetDistrictMap2 == nil {
-		StreetDistrictMap2 = make(map[string]*District)
-	}
-	//初始化省
-	fn1 := InitProvince("v3.0")
-	for k, v := range fn1 {
-		for _, p := range v.([]interface{}) {
-			p1, _ := p.(string)
-			AreaProvinceGet2.AddWord(p1)
-			ProvinceMap2[p1] = k
-		}
-	}
-
-	//初始化城市全称
-	fn2 := InitCityAll("v3.0")
-	for k, v := range fn2 {
-		AreaProvinceGet2.AddWord(k) //省全称
-		p := &Province{}
-		p.Name = k
-		p.Brief = v["brief"].(string)
-		ProvinceMap2[k] = p.Brief
-		//
-		ProvinceBrief2[p.Brief] = p
-		p.Cap = v["captial"].(string)
-		city, _ := v["city"].(map[string]interface{})
-		for k1, v1 := range city {
-			v1m, _ := v1.(map[string]interface{})
-			c := &City{}
-			c.Name = k1
-			//			if v1m["brief"] == nil {
-			//			}
-			c.Brief = v1m["brief"].(string)
-			//
-			CityBrief2[c.Brief] = c
-			c.P = p
-			if c.Brief == p.Cap {
-				p.Captial = c
-			}
-			//加入到城市map中
-			cs := AreaToCity2[k1]
-			AreaGet2.AddWord(k1) //市全称
-			if cs != nil {
-				cs = append(cs, c)
-			} else {
-				cs = []*City{c}
-			}
-			AreaToCity2[k1] = cs
-			//区县
-			districtmap := v1m["area"].(map[string]interface{}) //区或县
-			for district, streetarr := range districtmap {
-				d := &District{}
-				d.Name = district
-				d.C = c
-				AreaDistrict2.AddWord(district) //加入区或县敏感词
-				ctmp := DistrictCityMap2[district]
-				if ctmp == nil {
-					DistrictCityMap2[district] = c
-				}
-				//街道
-				for _, s := range qu.ObjArrToStringArr(streetarr.([]interface{})) {
-					AreaStreet2.AddWord(s) //加入街道敏感词
-					dtmp := StreetDistrictMap2[s]
-					if dtmp == nil {
-						StreetDistrictMap2[s] = d
-					}
-				}
-			}
-		}
-	}
-	//初始化城市简称
-	fn3 := InitCitySim("v3.0")
-	AreaSimGet2 = &ju.DFA{}
-	for k, v := range fn3 {
-		pb := v["brief"].(string)
-		p := ProvinceBrief2[pb]
-		//加载
-		for _, ss := range []string{k, pb} {
-			cs := AreaToCity2[ss]
-			if cs != nil {
-				cs = append(cs, p.Captial)
-			} else {
-				cs = []*City{p.Captial}
-			}
-			AreaToCity2[ss] = cs
-			AreaSimGet2.AddWord(ss) //省全称和省简称
-		}
-		city, _ := v["city"].(map[string]interface{})
-		for k1, v1 := range city {
-			v1m, _ := v1.(map[string]interface{})
-			if v1m["brief"] == nil {
-			}
-			cb := v1m["brief"].(string)
-			c := AreaToCity2[k1][0]
-			//加入到城市map中
-			for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州  浙江杭州
-				AreaSimGet2.AddWord(ss)
-				cs := AreaToCity2[ss]
-				if cs != nil {
-					cs = append(cs, c)
-				} else {
-					cs = []*City{c}
-				}
-				AreaToCity2[ss] = cs
-			}
-			arr := v1m["area"].([]interface{})
-			for _, k2 := range arr {
-				s := k2.(string)
-				for n, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安
-					cs := AreaToCity2[ss]
-					AreaSimGet2.AddWord(ss)
-					if cs != nil {
-						cs = append(cs, c)
-					} else {
-						cs = []*City{c}
-					}
-					AreaToCity2[ss] = cs
-
-					//只加入简称
-					if n == 0 {
-						d := &District{}
-						d.Name = ss
-						d.C = c
-						AreaDistrict2.AddWord(ss) //加入区或县简称敏感词
-						ctmp := DistrictCityMap2[ss]
-						if ctmp == nil {
-							DistrictCityMap2[ss] = c
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-func FindBuyer() {
-	list, _ := db.Mgo.Find("buyer", nil, nil, `{"name":1}`, false, -1, -1)
-	for _, l := range *list {
-		val := qu.ObjToString(l["name"])
-		if val != "" {
-			//开始抽取城市省份
-			bres, c, p := ExtractProvinceCity2("", "", qu.BsonIdToSId(l["_id"]), []string{val})
-			bres, p, c, d := ExtractDistrict2([]string{val}, bres, c, p, qu.BsonIdToSId(l["_id"])) //抽取区或县
-			log.Println("---------------------", bres, c, p, d)
-		}
-	}
-}
-
-//抽取城市、省份
-func ExtractProvinceCity2(province, city, id string, text []string) (bres bool, c, p string) {
-	defer qu.Catch()
-	bc := true //是否继续抽取
-	if city != "" {
-		lock.Lock()
-		citybrief := CityBrief2[city]
-		//log.Println("citybrief========", citybrief)
-		lock.Unlock()
-		if citybrief == nil { //简称不存在
-			log.Println("city err:", city, id)
-		} else { //简称存在
-			lock.Lock()
-			pbrief := CityBrief2[city].P.Brief
-			//log.Println("pbrief========", pbrief)
-			lock.Unlock()
-			if province != pbrief { //省份不配对
-				log.Println("province err:", city, province, id)
-			} else {
-				bc = false
-				//城市省份都正确
-			}
-		}
-	}
-	//有省份
-	bp := false
-	lock.Lock()
-	provincebrief := ProvinceBrief2[province]
-	//log.Println("provincebrief========", provincebrief)
-	lock.Unlock()
-	if provincebrief != nil { //省份简称正确
-		bp = true
-	} else { //没有省份,先识别省份
-		for _, str := range text { //没有省的简称,从配置的字段信息中抽取省
-			word := AreaProvinceGet2.CheckSensitiveWord(str) //省全称DFA中匹配
-			if word != "" {
-				lock.Lock()
-				province = ProvinceMap2[word]
-				lock.Unlock()
-				bp = true
-				break
-			}
-		}
-	}
-	//匹配城市
-	if bc { //城市简称不存在CityBrief[city]==nil,或城市简称存在但省份不配对,继续抽取
-		for pos, GET := range []*ju.DFA{AreaGet2, AreaSimGet2} { //AreaGet市全称,AreaSimGet省全称和简称
-			ws := make([]string, 5)
-			for n, str := range text {
-				if str != "" {
-					word := GET.CheckSensitiveWord(str)
-					if pos == 1 { //用简称 后辍为路、集团替换
-						str1 := strings.Replace(str, word+"路", "", 1)
-						if str1 != str {
-							word = GET.CheckSensitiveWord(str1)
-						}
-					}
-					ws[n] = word
-					if word != "" {
-						lock.Lock()
-						res := AreaToCity2[word]
-						lock.Unlock()
-						if len(res) == 1 {
-							//判断省份
-							if !bp || province == res[0].P.Brief { //省份不存在或一致直接返回(!bp:省的简称)
-								bres = true
-								c = res[0].Brief
-								p = res[0].P.Brief
-								break
-							} else { //不一致时。。暂时不处理
-							}
-						} else { //多个时(出现这种情况是多个省中的市,市名相同。现在的配置文件中已经将市名,县名重复的全部去掉)
-						}
-					}
-				}
-			}
-			if !bres { //没有匹配到
-				mc := map[string]int{}
-				for _, w := range ws {
-					lock.Lock()
-					res := AreaToCity2[w]
-					lock.Unlock()
-					for _, ct := range res {
-						log.Println("ct===", ct)
-						if ct == nil {
-							continue
-						}
-						if bp { //有省份
-							if ct.P != nil && ct.P.Brief == province {
-								mc[ct.Brief]++
-							}
-						} else { //没有省份
-							mc[ct.Brief]++
-						}
-					}
-				}
-				//计算mc中最大值且大于1
-				max := 1
-				v := ""
-				for mk, mv := range mc {
-					if mv > max {
-						v = mk
-					}
-				}
-				if v != "" {
-					bres = true
-					lock.Lock()
-					ctb := CityBrief2[v]
-					lock.Unlock()
-					c = ctb.Brief
-					p = ctb.P.Brief
-				} else if len(mc) > 0 {
-					//取级别更大的
-					v := ""
-					for mk, _ := range mc {
-						lock.Lock()
-						cb := CityBrief2[mk]
-						lock.Unlock()
-						if cb.P.Cap == mk {
-							bres = true
-							c = cb.Brief
-							p = cb.P.Brief
-							break
-						} else {
-							v = mk
-						}
-					}
-					if !bres {
-						bres = true
-						lock.Lock()
-						cbb := CityBrief2[v]
-						c = cbb.Brief
-						p = cbb.P.Brief
-						lock.Unlock()
-					}
-				}
-			}
-			if bres {
-				break
-			}
-		}
-	} else {
-		return
-	}
-	if !bres {
-		//取默认省会
-		lock.Lock()
-		pbp := ProvinceBrief2[province]
-		lock.Unlock()
-		if pbp != nil {
-			bres = true
-			c = pbp.Cap
-			p = province
-		}
-	}
-	return
-}
-func ExtractDistrict2(field []string, bres bool, c, p, id string) (bool, string, string, string) {
-	d := ""
-	for _, str := range field {
-		//log.Println("field===========", str)
-		for pos, GET := range []*ju.DFA{AreaDistrict2, AreaStreet2} { //先匹配区或县再匹配街道
-			word := GET.CheckSensitiveWord(str)
-			//log.Println("word================", word)
-			if word != "" {
-				if pos == 0 { //区或县匹配
-					//log.Println("县直接匹配到====", word)
-					lock.Lock()
-					city := DistrictCityMap2[word]
-					lock.Unlock()
-					//log.Println("city================", city)
-					if city != nil {
-						d = word
-						ctmp := city.Brief
-						ptmp := city.P.Brief
-						//log.Println("ctmpptmp================", ptmp, ctmp)
-						if !bres { //城市省份没有抽到,通过区或县定位市和省
-							c = ctmp
-							p = ptmp
-							bres = true
-						} else { //对比抽到的城市省份是否一致
-							if c != ctmp || p != ptmp {
-								//log.Println("str---", str, "====", word)
-								//log.Println("district: City And Province, Inconsistent Before And After,Id:", id, c, p, ctmp, ptmp, d)
-								c = ctmp
-								p = ptmp
-							}
-						}
-					}
-				} else { //街道匹配
-					//log.Println("匹配到街道====", word)
-					lock.Lock()
-					district := StreetDistrictMap2[word]
-					lock.Unlock()
-					//log.Println("district================", district)
-					if district != nil {
-						d = district.Name
-						ctmp := district.C.Brief
-						ptmp := district.C.P.Brief
-						//log.Println("districtptmp================", ctmp, ptmp)
-						if !bres { //城市省份没有抽到,通过区或县定位市和省
-							c = ctmp
-							p = ptmp
-							bres = true
-						} else { //对比抽到的城市省份是否一致
-							if c != ctmp || p != ptmp {
-								//log.Println("street: City And Province, Inconsistent Before And After,Id:", id, c, p, ctmp, ptmp, d)
-								c = ctmp
-								p = ptmp
-							}
-						}
-					}
-				}
-				return bres, p, c, d
-			}
-		}
-	}
-	return bres, p, c, d
-}

+ 1 - 1
src/jy/extract/extractudp.go

@@ -230,7 +230,7 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				//	log.Debug(index, qu.BsonIdToSId(v["_id"]), "//去除含敏感词数据")
 				//	continue
 				//}
-				if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时开标记录
+				if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl"  || "a_hbszbtbggfwpt_kbjl" == qu.ObjToString(v["spidercode"]) { //临时开标记录
 					log.Debug(index, qu.BsonIdToSId(v["_id"]), "//开标记录")
 					continue
 				}

+ 3 - 2
src/jy/pretreated/analykv.go

@@ -2,7 +2,8 @@ package pretreated
 
 import (
 	u "jy/util"
-	//qu "qfw/util"
+	"qfw/util"
+
 	"regexp"
 	"strings"
 )
@@ -431,7 +432,7 @@ func keydetail(k, v string, m *SortMap, tag string, pos int, strs [][]string, ma
 			if vvv, ok := m.Map[k].([]string); ok {
 				vals = append(vals, vvv...)
 			} else {
-				vals = append(vals, v)
+				vals = append(vals,  util.ObjToString(m.Map[k]))
 			}
 			vals = append(vals, v)
 			m.AddKey(k, vals)

+ 7 - 4
src/jy/pretreated/analystep.go

@@ -14,17 +14,20 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-var yjReg *regexp.Regexp = regexp.MustCompile("单位业绩|个人业绩|投标人业绩|主要人员相关资料|投标文件格式|唱标记录|项目业绩|否决投标的?情况说明")
+var yjReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|主要人员相关资料|投标文件格式|唱标记录|否决投标的?情况说明")
+var hisReg = regexp.MustCompile("(开标记录|类似业绩|历史业绩|填报项目业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</td>)")
 var hisReg2 = regexp.MustCompile("(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</tr>|</table>|</td>)")
 var formattext = regexp.MustCompile("(投标总价)([0-9,.万元]*)")
 var formattext2 = regexp.MustCompile("中标单价.*(中标总价.*)")
+var formattext3 = regexp.MustCompile("(同类项目业绩、)")
 
 func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con := job.Content
 	//全文的需要修复表格
 	con = RepairCon(con)
 	//格式化正文
-	con = hisReg.ReplaceAllString(con, "")
+	con = formattext3.ReplaceAllString(con,"")
+	con = hisReg.ReplaceAllString(con, "${2}")
 	con = hisReg2.ReplaceAllString(con, "${2}")
 	con = formattext.ReplaceAllString(con, "${1}:${2}")
 	con = formattext2.ReplaceAllString(con, "${1}")
@@ -117,10 +120,10 @@ func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite st
 	//块中再查找表格(块,处理完把值赋到块)
 	bl.Text = formatText(bl.Text, "biangeng")
 	tabs, _ := ComputeConRatio(bl.Text, 2)
-	for _, tab := range tabs {
+	for i, tab := range tabs {
 		job.HasTable = 1
 		tmptag := ""
-		if bl.Title != "" && len(bl.Title) < 20 {
+		if i == 0 && bl.Title != "" && len(bl.Title) < 20 {
 			tmptag = bl.Title
 		} else if tab.Nodes[0] != nil && tab.Nodes[0].PrevSibling != nil {
 			tmptag = strings.TrimSpace(tab.Nodes[0].PrevSibling.Data)

+ 0 - 1
src/jy/pretreated/tablev2.go

@@ -824,7 +824,6 @@ func CheckHeader(txt string) (res, must bool, stype, reg, repl string) {
 con 文本
 strtype 1全文 2块文本
 **/
-var hisReg = regexp.MustCompile("类似业绩|历史业绩|开标记录|填报项目业绩")
 
 func ComputeConRatio(con string, strtype int) (tabs []*goquery.Selection, ratio float32) {
 	defer qutil.Catch()

+ 3 - 1
src/jy/util/util.go

@@ -2,6 +2,7 @@ package util
 
 import (
 	"fmt"
+	"gopkg.in/mgo.v2"
 	. "jy/mongodbutil"
 	qu "qfw/util"
 	"regexp"
@@ -38,6 +39,7 @@ var BrandGet *DFA     //品牌
 var IsBrandGoods bool //是否开启品牌抽取
 
 var SaveResult, FieldsFind, IsSaveTag, SaveBlock, QualityAudit, Ffield bool
+var AddrsSess *mgo.Collection
 
 func init() {
 	syncint = make(chan bool, 1)
@@ -48,7 +50,7 @@ func UtilInit() {
 	addr := qu.ObjToString(Config["mgodb"])
 	dbname := qu.ObjToString(Config["dbname"])
 	Mgo = MgoFactory(initCap, initCap*3, 120, addr, dbname)
-
+	AddrsSess = Mgo.Get().DB(qu.ObjToString(Config["dbname_addrs"])).C(qu.ObjToString(Config["dbname_addrs_c"]))
 	SaveResult, _ = Config["saveresult"].(bool)
 	FieldsFind, _ = Config["fieldsfind"].(bool)
 	IsSaveTag, _ = Config["iscltlog"].(bool)

+ 0 - 264
src/main_blocktest.go

@@ -1,264 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"jy/extract"
-	"jy/mongodbutil"
-	"jy/pretreated"
-	ju "jy/util"
-	"log"
-	"os"
-	qu "qfw/util"
-	"regexp"
-	"time"
-)
-
-var f *os.File
-var m = map[string]bool{}
-
-func main1() {
-	//winnerorder()
-	//return
-	//log.Println(pretreated.ProcTitle("以上公告内容如有变动将在相关网络媒体上另行通知凡购买本招标文件的单位必须就此采购项目的相关事宜详细咨询否则参与投标即被视为已经充分了解了招标方的需求中标后承担该文件范围内的所有要求投标前如对招标文件存有疑问请在投标截止日期前三个工作日以实名制书面文件向我公司询问否则视为接受已报名购买招标文件的投标商未递交投标文件或虽递交投标文件但未参加开标大会的投标商不得再参加该项目的采购活动"))
-	//return
-	//f, _ = os.OpenFile("./title.txt", os.O_RDWR|os.O_CREATE, 777)
-	//all()
-	one()
-}
-func all() {
-	m := mongodbutil.MgoFactory(3, 3, 120, "127.0.0.1:27092", "extract_kf")
-	sess := m.Get()
-	defer m.Close(sess)
-	it := sess.DB("extract_kf").C("bidding201901").Find(nil).Iter()
-	pool := make(chan bool, 5)
-	count := 0
-	for temp := make(map[string]interface{}); it.Next(&temp); {
-		pool <- true
-		count++
-		go func(d map[string]interface{}) {
-			defer func() {
-				<-pool
-			}()
-			com(d)
-		}(temp)
-		temp = make(map[string]interface{})
-		if count%200 == 0 {
-			log.Println(count)
-		}
-	}
-	log.Println("over...")
-	time.Sleep(time.Hour)
-}
-func one() {
-	m := mongodbutil.MgoFactory(3, 3, 120, "127.0.0.1:27092", "extract_kf")
-	d, _ := m.FindById("bidding_data2", "5e59321150b5ea296ed84985", extract.Fields)
-	com(*d)
-}
-func com(doc map[string]interface{}) {
-	detail := GetDetail(doc)
-	doc["detail"] = detail
-	toptype := qu.ObjToString(doc["toptype"])
-	subtype := qu.ObjToString(doc["subtype"])
-	if qu.ObjToString(doc["type"]) == "bid" {
-		toptype = "结果"
-	}
-	if toptype == "" {
-		toptype = "*"
-	}
-	e := &extract.ExtractTask{
-		TaskInfo: &extract.TaskInfo{
-			Version:     "v3.6",
-			VersionId:   "5e02be5869841446c0005a93",
-			ProcessPool: make(chan bool, 1),
-		},
-	}
-	e.Id = qu.ObjToString(ju.Config["udptaskid"])
-	e.InitTaskInfo()
-	//d.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
-	//d.TaskInfo.TDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB)
-	e.InitSite()
-	e.InitRulePres()
-	e.InitRuleBacks(false)
-	//e.InitRuleBacks(true)
-	e.InitRuleCore(false)
-	//e.InitRuleCore(true)
-	e.InitBlockRule()
-	e.InitPkgCore()
-	e.InitTag(false)
-	//e.InitTag(true)
-	e.InitClearFn(false)
-	//e.InitClearFn(true)
-	if e.IsExtractCity { //版本上控制是否开始城市抽取
-		//初始化城市DFA信息
-		e.InitCityDFA()
-		e.InitAreaCode()
-		e.InitPostCode()
-	}
-	//质量审核
-	e.InitAuditFields()
-	e.InitAuditRule()
-	e.InitAuditClass()
-	e.InitAuditRecogField()
-
-	//品牌抽取是否开启
-	ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
-
-	j := &ju.Job{
-		SourceMid:      qu.BsonIdToSId(doc["_id"]),
-		Category:       toptype,
-		CategorySecond: subtype,
-		Content:        qu.ObjToString(doc["detail"]),
-		SpiderCode:     qu.ObjToString(doc["spidercode"]),
-		Site:           qu.ObjToString(doc["site"]),
-		//Domain:     qu.ObjToString(doc["domain"]),
-		//Href:       qu.ObjToString(doc["href"]),
-		Title:     qu.ObjToString(doc["title"]),
-		Data:      &doc,
-		City:      qu.ObjToString(doc["city"]),
-		Province:  qu.ObjToString(doc["area"]),
-		Result:    map[string][]*ju.ExtField{},
-		BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
-		RuleBlock: e.RuleBlock,
-		Dataging:  qu.IntAll(doc["dataging"]),
-	}
-	e.TaskInfo.ProcessPool <- true
-	pretreated.AnalyStart(j, false, "")
-	e.ExtractProcess(j, nil, false)
-	log.Println("=============块信息================")
-	for _, v := range j.Block {
-		log.Println("----", v.Title, v.Titles, "----")
-		if v.ColonKV != nil {
-			for kk, vv := range v.ColonKV.KvTags {
-				for _, vvv := range vv {
-					log.Println("ColonKV", kk, "---", vvv.Key, "---", vvv.Value, "---", vvv.Weight)
-				}
-			}
-		}
-		if v.SpaceKV != nil {
-			for kk, vv := range v.SpaceKV.KvTags {
-				for _, vvv := range vv {
-					log.Println("SpaceKV", kk, "---", vvv.Key, "---", vvv.Value, "---", vvv.Weight)
-				}
-			}
-		}
-		if v.TableKV != nil {
-			for kk, vv := range v.TableKV.KvTags {
-				for _, vvv := range vv {
-					log.Println("TableKV", kk, "---", vvv.Key, "---", vvv.Value, "---", vvv.Weight)
-				}
-			}
-		}
-		//log.Println("Classify", v.Classify)
-		//log.Println("Tag", v.Tag)
-	}
-	log.Println("=============抽取结果================")
-	//	log.Println(e.ResultArr)
-	set := (e.ResultArr[0][1]["$set"]).(map[string]interface{})
-	for k, v := range set {
-		//if k == "budget" || k == "bidamount" || k == "winner" || k == "amount" || k == "projectname" || k == "projectcode" || k == "buyer" || k == "buyerperson" || k == "buyertel" || k == "agency" {
-		log.Println(k, "---", v)
-		//}
-	}
-	log.Println("=============抽取结果 result================")
-	return
-	for k, v := range set["result"].(map[string][]*ju.ExtField) {
-		if k != "winner" {
-			continue
-		}
-		for _, vv := range v {
-			log.Println(k, fmt.Sprintf("%+v", vv))
-			for kkk, vvv := range vv.ScoreItem {
-				log.Println("--", kkk, k, fmt.Sprintf("%+v", vvv))
-			}
-			//log.Println("\n")
-		}
-	}
-	log.Println("=============中标候选人================")
-	for _, v := range j.Winnerorder {
-		log.Println(v)
-	}
-	log.Println("=============分包================")
-	for k, v := range j.BlockPackage {
-		log.Println(k, v)
-	}
-	log.Println("=============正文================")
-	//log.Println(j.Content)
-	return
-	for _, v := range j.Block {
-		if v.ColonKV != nil && v.ColonKV.KvTags != nil {
-			for kk, vv := range v.ColonKV.KvTags {
-				for _, vvv := range vv {
-					log.Println(kk, vvv.Weight, vvv.Value)
-				}
-			}
-		}
-		if v.TableKV != nil && v.TableKV.KvTags != nil {
-			for kk, vv := range v.TableKV.KvTags {
-				for _, vvv := range vv {
-					log.Println(kk, vvv.Weight, vvv.Value)
-				}
-			}
-		}
-		if v.SpaceKV != nil && v.SpaceKV.KvTags != nil {
-			for kk, vv := range v.SpaceKV.KvTags {
-				for _, vvv := range vv {
-					log.Println(kk, vvv.Weight, vvv.Value)
-				}
-			}
-		}
-	}
-	log.Println(len(j.Block))
-	return
-	for _, v := range j.Block {
-		if m[v.Title] || v.Title == "" {
-			continue
-		}
-		if !regexp.MustCompile("或|和|以?及|与|、|或").MatchString(v.Title) {
-			//continue
-		}
-		m[v.Title] = true
-		f.WriteString(j.SourceMid + "-----" + v.Title + "---" + fmt.Sprint(v.Titles) + "\n")
-		continue
-		for _, kv := range v.ColonKV.Kvs {
-			//log.Println("\n")
-			log.Println(kv.Key, "---", kv.Value)
-			log.Println(kv.Line)
-			log.Println("=======================")
-		}
-	}
-}
-
-func GetDetail(doc map[string]interface{}) (detail string) {
-	detail = ""
-	d1, _ := doc["detail"].(string)
-	d2, _ := doc["contenthtml"].(string)
-	if len(d1) >= len(d2) || d2 == "" {
-		detail = d1
-	} else {
-		detail = d2
-	}
-	detail = ju.CutLableStr(detail)
-	detail = ju.NewCut().ClearHtml(detail)
-	tabs, ration := pretreated.ComputeConRatio(detail, 1)
-	if len(tabs) > 0 {
-		newcon, newtabs, newration := pretreated.FindBigText(detail, ration, tabs)
-		//log.Println(newcon, newtabs, newration)
-		if newcon != "" && newration == 0 {
-			detail = newcon
-			tabs = newtabs
-			ration = newration
-		}
-	}
-	return detail
-}
-func winnerorder() {
-	text := `评审专家名单:1
-吴殿波、韩屹、孙胜进、郑丹、李海波
- 
-中标标的名称、规格型号、数量、单价、服务要求:
-2019年沈阳惠涌供热有限责任公司、沈阳圣达热力供暖有限责任公司、沈阳惠盛供热有限责任公司PE管保温
-第一入围供货商:沈阳曲暖鼎盛保温安装有限公司 、总单价:11.833300
-第二入围供货商:沈阳国盛防腐保温有限公司、总单价:11.102100
-第三入围供货商:沈阳泰豪管材有限公司、总单价:13.258100`
-	log.Println((&pretreated.WinnerOrderEntity{}).Find(text, true, 1, false, ""))
-}

+ 0 - 141
src/main_test.go

@@ -1,141 +0,0 @@
-package main
-
-import (
-	"fmt"
-	"jy/admin/track"
-	"jy/clear"
-	"jy/extract"
-	. "jy/mongodbutil"
-	"log"
-	"os"
-	"qfw/util"
-	"regexp"
-	"strconv"
-	"strings"
-	"testing"
-	"time"
-)
-
-func Test_han(t *testing.T) {
-	str := `[\u4e00-\u9fa5]` //"[\u4e00-\u9fa5]"
-	//var rg = regexp.MustCompile(`[\u4e00-\u9fa5]`)会出错
-	if strings.Contains(str, "\\u") {
-		pattern, _ := strconv.Unquote(`"` + str + `"`)
-		log.Println(pattern)
-	}
-	var rg = regexp.MustCompile(str)
-	fmt.Println(rg.MatchString(str))
-	os.Exit(0)
-}
-func Test_task(t *testing.T) {
-	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
-	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
-	//extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "5df59ee3e9d1f601e46fc3f9", "1", "mxs_v1", "mxs_v1")
-	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5df50776e9d1f601e4964179", "1", "mxs_v1", "mxs_v2")
-	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
-	time.Sleep(5 * time.Second)
-}
-func Test_extractcity(t *testing.T) {
-	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
-	extract.InitDFA2()
-	//查询采购单位信息
-	extract.FindBuyer()
-}
-func Test_reg(t *testing.T) {
-	reg1, _ := regexp.Compile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
-	log.Println("---", reg1.FindAllString("05939-5365001(兰陵县芦柞镇人民政府)", -1))
-	reg2, _ := regexp.Compile("^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$")
-	log.Println("---", reg2.MatchString("张女士/"))
-	filterK := regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
-	log.Println(filterK.FindString("二)采购项目联系人(代理机构)"))
-}
-
-func Test_reg1(t *testing.T) {
-	context := `sss<input  name="AgentCode" size="30" maxsize="50" value="91370800688271668P" class="textbox">
-    dfdf<input type="hidden" name="AgentCode" size="30" maxsize="50" value="tttt" class="textbox"></input>`
-	reg := regexp.MustCompile(`<\s*input.*value=['"](.[^'"]+).+>`)
-	tmp := reg.ReplaceAllString(context, "$1")
-	log.Println(tmp)
-}
-
-func Test_paths(t *testing.T) {
-	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
-	tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
-	for code, v := range tracks {
-		if tmp, ok := v.([]map[string]interface{}); ok {
-			for k, v := range tmp {
-				if k == 0 {
-					log.Println(v)
-				} else {
-					log.Println(code, v["code"], v["value"])
-				}
-			}
-		}
-		break
-	}
-}
-
-func Test_clear(t *testing.T) {
-	value := "法拉(盛(客{)户)端副科级沙发俩括号的"
-	log.Println("pre---", value)
-	startChars := []string{"[((]", "[\\[【]", "[{{]", "[<《]", "〔"}
-	endChars := []string{"[))]", "[\\]】]", "[}}]", "[>》]", "〕"}
-	for k, v := range startChars {
-		sReg := regexp.MustCompile(v)
-		eReg := regexp.MustCompile(endChars[k])
-		sIndex := sReg.FindAllStringIndex(value, -1)
-		eIndex := eReg.FindAllStringIndex(value, -1)
-		sCount := len(sIndex)
-		eCount := len(eIndex)
-		if sCount == eCount {
-			continue
-		}
-		log.Println("value1---", value, sCount, eCount)
-		//清理前面
-		if sCount > eCount {
-			value = value[sIndex[eCount][1]:]
-		}
-		log.Println("value2---", value)
-		//清理后面
-		if sCount < eCount {
-			value = value[:eIndex[sCount][0]]
-		}
-		log.Println("value3---", value)
-	}
-	log.Println("value4---", value)
-	//交叉出现情况处理
-	sReplReg := regexp.MustCompile("[((\\[【{{〔<《][^))\\]】}}〕>》]*$")
-	eReplReg := regexp.MustCompile("^[^((\\[【{{〔<《]*[))\\]】}}〕>》]")
-	if sReplReg.MatchString(value) || eReplReg.MatchString(value) {
-		value = sReplReg.ReplaceAllString(value, "")
-		value = eReplReg.ReplaceAllString(value, "")
-		//value = fmt.Sprint(childCutNotPrs([]interface{}{value, data[1]}, count+1)[0])
-	}
-	log.Println("result---", value)
-}
-
-func Test_buyer(t *testing.T) {
-	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
-	demo, _ := Mgo.Find("demo_data", nil, `{"_id:1"}`, `{"buyer":1,"title":1}`, false, -1, -1)
-	result, _ := Mgo.Find("mxs_buyer", nil, `{"_id:1"}`, `{"buyer":1}`, false, -1, -1)
-	for _, d := range *demo {
-		id1 := util.BsonIdToSId(d["_id"])
-		buyer1 := util.ObjToString(d["buyer"])
-		//title := util.ObjToString(d["title"])
-		for _, r := range *result {
-			id2 := util.BsonIdToSId(r["_id"])
-			buyer2 := util.ObjToString(r["buyer"])
-			if id1 == id2 {
-				if buyer1 != buyer2 {
-					util.Debug(id1, buyer1, buyer2)
-				}
-				break
-			}
-		}
-	}
-}
-
-func Test_util1(t *testing.T) {
-	data := clear.CutSymbol([]interface{}{"----------123123", "-----123123"})
-	fmt.Println(data)
-}

+ 5 - 1
src/res/fieldscore.json

@@ -182,7 +182,7 @@
         "positivewords": [
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(总站|委员会|管委会|联合会|联合体|医院|卫计委|机关|社区|中心|中心校|分校|办公室|学校|幼儿园|动物园|管理站|馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|[初高]中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司|监狱|监测站|血站|检查站|工作站|供应站|分行|文明办)$",
+                "regstr": ".{2,100}(总站|委员会|管委会|联合会|联合体|医院|卫计委|机关|社区|中心站|中心|中心校|分校|办公室|学校|幼儿园|动物园|管理站|馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|[初高]中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司|监狱|监测站|血站|检查站|工作站|供应站|分行|文明办)$",
                 "score": 10
             }
         ],
@@ -256,6 +256,10 @@
                 "describe": "关键词",
                 "regstr": "(牵头方|联合体)",
                 "score": 3
+            },{
+                "describe": "多个公司",
+                "regstr": "(.*公司[;,,;、]){3,}",
+                "score": 5
             }
         ],
         "negativewords": [

+ 10 - 6
src/res/tablev1.json

@@ -2,9 +2,10 @@
 	"normalhead":[
 		"^((.{2,6}(描述|名称|编号|代码|时间|类型|性质|行政区域|原因|意见|须知|程度))|标段(编号)?|招标金额|规模|统一社会信用代码|拟?中标供应商|质量|(质量)?承诺|地址|招标代理|序号|材料|结构|结构层数|评委|单位|数量|排名|标的|标项|开户银行|邮编|账号|电话|传真|网址|得分|名次|包件?号|职务|(建设|招标|采购|中标|成交|甲|乙)(单位|人|供应商|方|规模).{0,2}|.{0,5}(价格?|额|资金|[预概]算|投资|费用|报价|投标价)[(]?(万?元?([大小]写)?)[)]?)$__M",
 		"^.{0,7}(((单位)?名称|总监|经理|负责人|信息|率|费|期|人|号|码|(价格?|额|资金)(万?元?([大小]写)?)|员|品目|标包|代表|区域|方式|因素|合价|合计|小计|地点|条件|(资质|类别和)等级|类别|状态)|得分|注册专业|方法|家数|全称|简称|邮件|执业或职业资格|证书|部门|事项|来源|划分|长度|规模|保证金|目标)$__",
-		"(名单|证号|名称|要求|时间|日期|地点|单位|条款|机构|范围|情况|概况|品名|规格|参数|标准|指标|型号|限价|数量|方式|等级|依据|明细|概况|内容|次数|产品|性质|地区|地址|币种|主题|详情|说明|代理(公司|机构)|节支率|名单|结果|结果公示)$|^(职称|姓名|级别|职称专业|证书名称|证书编号)$__",
+		"(名单|证号|名称|要求|时间|日期|地点|单位|条款|机构|范围|情况|概况|品名|规格|参数|标准|指标|型号|限价|数量|方式|等级|依据|明细|概况|内容|次数|产品|性质|地区|地址|币种|主题|详情|说明|代理(公司|机构)|节支率|名单|结果|结果公示)$|^(职称|姓名|级别|职称专业)$__",
 		"^(包号|联系|评标|单位|公告|采购|商品|附件|质保|用途|公示|机构|评审|品名|规格|参数|指标|型号|数量|证书).{0,10}$__",
-		"(专家|评委|打分|附件材料)$__",
+		"(专家|评委|[打得]分|附件材料)[a-zA-Z0-9]*$__M",
+		"(基本需求.{0,15}|.*联系方式|总计|包组|证书名称|证书编号|合同包|排序|二级建造师|项目负责人及资格证书编号)__M",
 		"品牌",
         "姓名",
 		"起讫桩号",
@@ -28,6 +29,7 @@
 		"(\\W{2,10}(名称|参数[及]?要求))$__M"
 	],
 	"jghead":[
+		"(报价|(元(/人|/间/天))|中转|航延|代理机构名称|地址和联系方式|联系电话|项目负责人及注册号|不含税预算金额\n(万元))__M",
 		"^.{0,2}[预拟]?(成交|中标|候选)(供应商|单位|企业|人|机构|价|金额).{0,2}$__M",
 		"^.{0,6}[打得评总](分)$__",
 		"(中标|磋商|投标|报|成交)总?(价|金额)__",
@@ -37,12 +39,13 @@
 	    "候选",
 		"业绩",
 		"荣誉",
-	    "排序",
 	    "排名",
 		"中标",
-		"供应商"
+		"供应商",
+		"详见附件及谈判、报价文件"
 	],
 	"con":[
+		"(是否通过资格|比例|评分结果|中标金额|质量目标|安全目标|承诺工期|推荐顺序|参考对象|工程质量$|工期[((]交货期[))]|合作伙伴|(包名|中标(候选人|供应商).{0,5}|第[一二三四五六七八九十]+中标候选人)[::]?[\\s]*)__M",
 		"^((子|合同|分|施工|监理)?(标段?|包|合同段|标包|序号)[a-zA-Z0-9\\-一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+)__$1",
 		"([a-zA-Z0-9\\-一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+(子|合同|分|施工|监理)?(标段?|包|合同段|标包|号))$__$1",
 		"(^[a-zA-Z0-9\\-一二三四五六七八九十ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ]+$)__$1",
@@ -50,7 +53,9 @@
 		"(^.{5,}(公司))__",
 		".{2,20}元整|[\\d]+万?元__",
 		".{4,}采购(项目)?__",
-		"(首选|第[一二三四五1-5])(顺序|推荐)?(中标|候选|成交)?(候选)?(人|单位|供应商)__BO"
+		"(首选|第[一二三四五1-5])(顺序|推荐)?(承包|中标|候选|成交)?(候选)?(人|单位|供应商)__M",
+		"(招单价|无供应商报价|全部内容|计量单位|符合国家及行业标准的合格工程|最终报价[0-9,.,。万元()]*|二级建造师|公示信息|[甲乙丙]级)__",
+		"^采购包[0-9]+$__"
 	],
 	"abandontable":[
 		"(磋商|谈判|评标(委员会)?)?((小组)?成员|(评审)?专家)(名单)?$__",
@@ -61,7 +66,6 @@
 		"落标供应商及落标原因",
 		"被废标供应商名称",
 		"主要人员",
-		"其他投标人",
 		"年估算额年(万元)"
 	],
 	"bidorder":[

+ 5 - 5
udpcreateindex/src/main.go

@@ -46,8 +46,8 @@ var (
 
 func init() {
 	util.ReadConfig(&Sysconfig)
-	inits()
-	go checkMapJob()
+	//inits()
+	//go checkMapJob()
 	detailLength = util.IntAllDef(Sysconfig["detaillength"], 50000)
 	fileLength = util.IntAllDef(Sysconfig["filelength"], 50000)
 	updport, _ = Sysconfig["updport"].(string)
@@ -65,7 +65,7 @@ func init() {
 		MongodbAddr: mconf["addr"].(string),
 		Size:        util.IntAllDef(mconf["pool"], 5),
 		DbName:      mconf["db"].(string),
-		ReplSet:     "bidding",
+		//ReplSet:     "bidding",
 	}
 	mgo.InitPool()
 
@@ -101,7 +101,7 @@ func init() {
 	}
 	mgostandard = &mongodb.MongodbSim{
 		MongodbAddr: standard["addr"].(string),
-		ReplSet:     "bidding",
+		//ReplSet:     "bidding",
 		Size:        util.IntAllDef(standard["pool"], 5),
 		DbName:      standard["db"].(string),
 	}
@@ -182,7 +182,7 @@ func init() {
 }
 
 func main() {
-	go task_index()
+	//go task_index()
 	updport := Sysconfig["udpport"].(string)
 	udpclient = mu.UdpClient{Local: updport, BufSize: 1024}
 	udpclient.Listen(processUdpMsg)

+ 7 - 9
udps/main.go

@@ -10,8 +10,6 @@ import (
 	qutil "qfw/util"
 	"qfw/util/mongodb"
 	"time"
-
-	"gopkg.in/mgo.v2/bson"
 )
 
 var startDate, endDate string
@@ -32,13 +30,13 @@ func main() {
 	flag.StringVar(&q, "q", "", "q查询语句\"{'':''}\",有q就不要gtid,lteid")
 	flag.StringVar(&param, "param", "", "param,生信息发布或其他索引时用双引号套单引号\"{'mgoaddr':'','d':'','c':'','index':'','type':''}\"")
 	flag.Parse()
-	if startDate != "" || endDate != "" {
-		start, _ := time.ParseInLocation(qutil.Date_Short_Layout, startDate, time.Local)
-		end, _ := time.ParseInLocation(qutil.Date_Short_Layout, endDate, time.Local)
-		id1 = qutil.BsonIdToSId(bson.NewObjectIdWithTime(start))
-		id2 = qutil.BsonIdToSId(bson.NewObjectIdWithTime(end))
-		log.Println(id1, id2)
-	}
+	//if startDate != "" || endDate != "" {
+	//	start, _ := time.ParseInLocation(qutil.Date_Short_Layout, startDate, time.Local)
+	//	end, _ := time.ParseInLocation(qutil.Date_Short_Layout, endDate, time.Local)
+	//	id1 = qutil.BsonIdToSId(bson.NewObjectIdWithTime(start))
+	//	id2 = qutil.BsonIdToSId(bson.NewObjectIdWithTime(end))
+	//	log.Println(id1, id2)
+	//}
 	if ip != "" && p > 0 && ((id1 != "" && id2 != "") || (q != "" || tmptime > 0)) {
 		toadd := &net.UDPAddr{
 			IP:   net.ParseIP(ip),

Some files were not shown because too many files changed in this diff