瀏覽代碼

中标预测抽取

zhangjinkun 4 年之前
父節點
當前提交
32e815c3ce
共有 3 個文件被更改,包括 86 次插入20 次删除
  1. 6 6
      src/jy/extract/extract.go
  2. 5 2
      src/jy/extract/extractInit.go
  3. 75 12
      src/jy/extract/extractudp.go

+ 6 - 6
src/jy/extract/extract.go

@@ -214,9 +214,9 @@ func RunExtractTask(taskId string) {
 			//if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
 			//	continue
 			//}
-			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl"{ //临时
+			if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时
 				continue
-			}			//根据标题判断是否抽取
+			} //根据标题判断是否抽取
 			b := IsExtract("title", qu.ObjToString(v["title"]), "")
 			if !b {
 				continue
@@ -1817,7 +1817,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if len(j.Winnerorder) > 0 { //候选人信息
 			for i, v := range j.Winnerorder {
 				if v["price"] != nil {
-					j.Winnerorder[i]["price"] = clear.ObjToMoney([]interface{}{v["price"], ""},j.SpiderCode)[0]
+					j.Winnerorder[i]["price"] = clear.ObjToMoney([]interface{}{v["price"], ""}, j.SpiderCode)[0]
 				}
 			}
 			tmp["winnerorder"] = j.Winnerorder
@@ -1959,7 +1959,8 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			tmp["projectname"] = j.Title
 		}
 		tmp["repeat"] = 0
-
+		e.ResultTmp = tmp
+		e.ResultFTmp = ffield
 		if e.TaskInfo.TestColl == "" {
 			if len(tmp) > 0 { //保存抽取结果
 				/*	if len(e.SiteFields) <= 0 {
@@ -2040,7 +2041,7 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 	delete(tmp, "detail")
 	if _, ok := tmp["bidamount"].(string); ok {
 		delete(tmp, "bidamount")
-	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && fb/100 > qu.Float64All(tmp["budget"]){
+	} else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && fb/100 > qu.Float64All(tmp["budget"]) {
 		delete(tmp, "bidamount")
 	}
 	if _, ok := tmp["budget"].(string); ok {
@@ -2404,4 +2405,3 @@ func RemoveReplicaSliceString(slc []string) []string {
 	}
 	return result
 }
-

+ 5 - 2
src/jy/extract/extractInit.go

@@ -20,8 +20,8 @@ import (
 
 type RegLuaInfo struct {
 	//正则或脚本信息
-	Code, Name, Field string  //
-	Score 			  float64
+	Code, Name, Field string //
+	Score             float64
 	RuleText          string  //
 	IsLua             bool    //
 	RegPreBac         *ExtReg //
@@ -144,6 +144,9 @@ type ExtractTask struct {
 	Seg_SV              *gse.Segmenter //分词
 	Luacodes            *sync.Map      //站点规则
 	SiteMerge           *sync.Map      //抽取合并
+
+	ResultTmp  map[string]interface{} //最终抽取结果
+	ResultFTmp map[string]interface{} //最终抽取结果(附件)
 }
 
 type SiteCity struct {

+ 75 - 12
src/jy/extract/extractudp.go

@@ -160,13 +160,13 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 				var j, jf *ju.Job
 				var isSite bool
-				if ext.IsFileField && (v["projectinfo"] != nil ||v["attach_text"] != nil ){
+				if ext.IsFileField && (v["projectinfo"] != nil || v["attach_text"] != nil) {
 					v["isextFile"] = true
-					j, jf,isSite = ext.PreInfo(v)
+					j, jf, isSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite = ext.PreInfo(v)
+					j, _, isSite = ext.PreInfo(v)
 				}
-				go ext.ExtractProcess(j, jf,isSite)
+				go ext.ExtractProcess(j, jf, isSite)
 				index++
 				ext.TaskInfo.ProcessPool <- true
 			}
@@ -177,13 +177,13 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 				var j, jf *ju.Job
 				var isSite bool
-				if ext.IsFileField && (v["projectinfo"] != nil ||v["attach_text"] != nil ){
+				if ext.IsFileField && (v["projectinfo"] != nil || v["attach_text"] != nil) {
 					v["isextFile"] = true
-					j, jf,isSite = ext.PreInfo(v)
+					j, jf, isSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite = ext.PreInfo(v)
+					j, _, isSite = ext.PreInfo(v)
 				}
-				go ext.ExtractProcess(j, jf,isSite)
+				go ext.ExtractProcess(j, jf, isSite)
 				index++
 				ext.TaskInfo.ProcessPool <- true
 			}
@@ -230,18 +230,18 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				_id := qu.BsonIdToSId(v["_id"])
 				var j, jf *ju.Job
 				var isSite bool
-				if ext.IsFileField && (v["projectinfo"] != nil ||v["attach_text"] != nil ){
+				if ext.IsFileField && (v["projectinfo"] != nil || v["attach_text"] != nil) {
 					v["isextFile"] = true
-					j, jf,isSite = ext.PreInfo(v)
+					j, jf, isSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite = ext.PreInfo(v)
+					j, _, isSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
 				wg.Add(1)
 				go func(wg *sync.WaitGroup, j, jf *ju.Job) {
 					defer wg.Done()
 					//log.Debug(index,j.SourceMid,)
-					ext.ExtractProcess(j, jf,isSite)
+					ext.ExtractProcess(j, jf, isSite)
 				}(&wg, j, jf)
 				index++
 				if index%1000 == 0 {
@@ -258,3 +258,66 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 		log.Debug("抽取完成,", "count:", count, ",index:", index, ",bidtotal:", ext.BidTotal, ",eid:", eid)
 	}
 }
+
+//中标预测信息抽取,ossid为附件识别后的id
+var exF *ExtractTask
+
+func ExtractByBidForecast(infoid string, ossid ...string) map[string]interface{} {
+	defer qu.Catch()
+	if exF == nil {
+		exF = &ExtractTask{}
+		exF.Id = qu.ObjToString(ju.Config["udptaskid"])
+		exF.InitTaskInfo()
+		exF.TaskInfo.FDB = db.MgoFactory(1, 2, 600, exF.TaskInfo.FromDbAddr, exF.TaskInfo.FromDB)
+		exF.TaskInfo.TDB = db.MgoFactory(1, 2, 600, exF.TaskInfo.ToDbAddr, exF.TaskInfo.ToDB)
+		exF.InitSite()
+		exF.InitRulePres()
+		exF.InitRuleBacks(false)
+		exF.InitRuleBacks(true)
+		exF.InitRuleCore(false)
+		exF.InitRuleCore(true)
+		exF.InitBlockRule()
+		exF.InitPkgCore()
+		exF.InitTag(false)
+		exF.InitTag(true)
+		exF.InitClearFn(false)
+		exF.InitClearFn(true)
+		if exF.IsExtractCity { //版本上控制是否开始城市抽取
+			//初始化城市DFA信息
+			//exF.InitCityDFA()
+			exF.InitCityInfo()
+			exF.InitAreaCode()
+			exF.InitPostCode()
+		}
+		//质量审核
+		exF.InitAuditFields()
+		exF.InitAuditRule()
+		exF.InitAuditClass()
+		exF.InitAuditRecogField()
+
+		//品牌抽取是否开启
+		ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
+
+		exF.ResultSave(true)
+		exF.BidSave(true)
+		exF.IsRun = true
+		exF.InitFile()
+	}
+	tmp, _ := exF.TaskInfo.FDB.FindById(exF.TaskInfo.FromColl, infoid, nil)
+	if exF.IsFileField && ((*tmp)["projectinfo"] != nil || (*tmp)["attach_text"] != nil) {
+		(*tmp)["isextFile"] = true
+	}
+	exF.TaskInfo.ProcessPool <- true
+	j, jf, _ := exF.PreInfo(*tmp)
+	wg := sync.WaitGroup{}
+	wg.Add(1)
+	go func(wg *sync.WaitGroup, j, jf *ju.Job) {
+		defer wg.Done()
+		exF.ExtractProcess(j, jf, false)
+	}(&wg, j, jf)
+	wg.Wait()
+	exF.BidSave(false)
+	fmt.Println(exF.ResultTmp)
+	fmt.Println(exF.ResultFTmp)
+	return nil
+}