Ver Fonte

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

apple há 5 anos atrás
pai
commit
d47a1186ca

+ 1 - 1
src/config.json

@@ -28,7 +28,7 @@
     "filelength": 100000,
     "iscltlog": false,
     "brandgoods": false,
-    "udptaskid": "5cdd3021698414032c8322b1",
+    "udptaskid": "5cdd3025698414032c8322b1",
     "udpport": "1484",
     "nextNode": [
         {

+ 34 - 32
src/jy/admin/audit/dataaudit.go

@@ -10,6 +10,7 @@ import (
 	qu "qfw/util"
 	"qfw/util/elastic"
 	redis "qfw/util/redis"
+	"strconv"
 	"strings"
 	"time"
 
@@ -70,16 +71,17 @@ func AuditOneField(c *gin.Context) {
 	start := c.GetInt("start")
 	limit := c.GetInt("length")
 	auditattr, _ := c.GetPostForm("auditattr")
+	check, _ := strconv.Atoi(auditattr)
 	query := map[string]interface{}{}
-	if auditattr != "-1" {
+	if check != -1 {
 		query = map[string]interface{}{
-			field + "_" + auditattr: 1,
+			"check": check,
 		}
 	} else {
 		query = map[string]interface{}{
 			"$or": []interface{}{
-				map[string]interface{}{field + "_ok": 1},
-				map[string]interface{}{field + "_err": 1},
+				map[string]interface{}{"check": 1},
+				map[string]interface{}{"check": 0},
 			},
 		}
 	}
@@ -107,24 +109,24 @@ func AllAudit(c *gin.Context) {
 	} else { //批量审核
 		SaveDb := ""
 		FieldBd := 0
-		ElasticClientIndex := ""
-		ElasticClientType := ""
+		// ElasticClientIndex := ""
+		// ElasticClientType := ""
 		RedisName := util.QYK_RedisName
 		if field == "winner" {
 			SaveDb = util.ElasticClientDB
 			FieldBd = util.WinnerDB
-			ElasticClientIndex = util.ElasticClientIndex
-			ElasticClientType = util.ElasticClientType
+			// ElasticClientIndex = util.ElasticClientIndex
+			// ElasticClientType = util.ElasticClientType
 		} else if field == "buyer" {
 			SaveDb = util.ElasticClientBuyerDB
 			FieldBd = util.BuyerDB
-			ElasticClientIndex = util.ElasticClientBuyerIndex
-			ElasticClientType = util.ElasticClientBuyerType
+			// ElasticClientIndex = util.ElasticClientBuyerIndex
+			// ElasticClientType = util.ElasticClientBuyerType
 		} else {
 			SaveDb = util.ElasticClientAgencyDB
 			FieldBd = util.AgencyDB
-			ElasticClientIndex = util.ElasticClientAgencyIndex
-			ElasticClientType = util.ElasticClientAgencyType
+			// ElasticClientIndex = util.ElasticClientAgencyIndex
+			// ElasticClientType = util.ElasticClientAgencyType
 		}
 		//redis
 		qykredis := redis.RedisPool[RedisName].Get()
@@ -156,11 +158,11 @@ func AllAudit(c *gin.Context) {
 						return
 					}
 				}
-				_, err := escon.Index().Index(ElasticClientIndex).Type(ElasticClientType).Id(sid).BodyJson(e).Refresh(true).Do()
-				if err != nil {
-					c.JSON(200, gin.H{"rep": false, "msg": "更新es错误"})
-					return
-				}
+				// _, err := escon.Index().Index(ElasticClientIndex).Type(ElasticClientType).Id(sid).BodyJson(e).Refresh(true).Do()
+				// if err != nil {
+				// 	c.JSON(200, gin.H{"rep": false, "msg": "更新es错误"})
+				// 	return
+				// }
 			}
 			//删除标记数据
 			query := map[string]interface{}{
@@ -177,8 +179,8 @@ func AllAudit(c *gin.Context) {
 func DataSave(c *gin.Context) {
 	SaveDb := ""
 	FieldBd := 0
-	ElasticClientIndex := ""
-	ElasticClientType := ""
+	// ElasticClientIndex := ""
+	// ElasticClientType := ""
 	RedisName := util.QYK_RedisName
 	//企业名称
 	e := make(map[string]interface{})
@@ -188,8 +190,8 @@ func DataSave(c *gin.Context) {
 	if field == "winner" {
 		SaveDb = util.ElasticClientDB
 		FieldBd = util.WinnerDB
-		ElasticClientIndex = util.ElasticClientIndex
-		ElasticClientType = util.ElasticClientType
+		// ElasticClientIndex = util.ElasticClientIndex
+		// ElasticClientType = util.ElasticClientType
 		capital, _ := c.GetPostForm("capital")
 		capitalfloat := clear.ObjToMoney([]interface{}{capital, ""})[0]
 		business_scope, _ := c.GetPostForm("business_scope")
@@ -200,8 +202,8 @@ func DataSave(c *gin.Context) {
 	} else if field == "buyer" {
 		SaveDb = util.ElasticClientBuyerDB
 		FieldBd = util.BuyerDB
-		ElasticClientIndex = util.ElasticClientBuyerIndex
-		ElasticClientType = util.ElasticClientBuyerType
+		// ElasticClientIndex = util.ElasticClientBuyerIndex
+		// ElasticClientType = util.ElasticClientBuyerType
 		buyerclass, _ := c.GetPostForm("buyerclass")
 		ranks, _ := c.GetPostForm("ranks")
 		buyer_type, _ := c.GetPostForm("type")
@@ -213,8 +215,8 @@ func DataSave(c *gin.Context) {
 	} else {
 		SaveDb = util.ElasticClientAgencyDB
 		FieldBd = util.AgencyDB
-		ElasticClientIndex = util.ElasticClientAgencyIndex
-		ElasticClientType = util.ElasticClientAgencyType
+		// ElasticClientIndex = util.ElasticClientAgencyIndex
+		// ElasticClientType = util.ElasticClientAgencyType
 		ranks, _ := c.GetPostForm("ranks")
 		agency_type, _ := c.GetPostForm("type")
 		e["ranks"] = ranks
@@ -279,13 +281,13 @@ func DataSave(c *gin.Context) {
 				return
 			}
 		}
-		escon := elastic.GetEsConn()
-		defer elastic.DestoryEsConn(escon)
-		_, err := escon.Index().Index(ElasticClientIndex).Type(ElasticClientType).Id(sid).BodyJson(e).Refresh(true).Do()
-		if err != nil {
-			c.JSON(200, gin.H{"rep": false, "msg": "更新es错误"})
-			return
-		}
+		// escon := elastic.GetEsConn()
+		// defer elastic.DestoryEsConn(escon)
+		// _, err := escon.Index().Index(ElasticClientIndex).Type(ElasticClientType).Id(sid).BodyJson(e).Refresh(true).Do()
+		// if err != nil {
+		// 	c.JSON(200, gin.H{"rep": false, "msg": "更新es错误"})
+		// 	return
+		// }
 	}
 	//删除标记数据
 	coll, _ := c.GetPostForm("coll")

+ 13 - 9
src/jy/extract/extract.go

@@ -27,12 +27,12 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask          //任务列表
-	ClearTaskList map[string]*ClearTask            //清理任务列表
-	saveLimit     = 100                            //抽取日志批量保存
-	PageSize      = 5000                           //查询分页
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 100                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -583,7 +583,11 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 		//函数清理
 		for key, val := range j.Result {
 			for i, v := range val {
-				//qu.Debug(key, v.Value)
+				// if v.ExtFrom == "title"&& v.Field == "buyer"{
+				// 	qu.Debug("title---",v.Value)
+				// }else if v.Field == "buyer"{
+				// 	qu.Debug("text---",v.Value)
+				// }
 				lockclear.Lock()
 				var cfn = []string{}
 				if isSite {
@@ -1940,7 +1944,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					tmp["epackage"] = string(bs)
 				}
 			}
-			//tmp["result"] = result
+			tmp["result"] = result
 			tmp["resultf"] = resultf
 			b := db.Mgo.Update(e.TaskInfo.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
 			if !b {
@@ -2130,7 +2134,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 { //reids未找到,执行规则匹配
+	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 22 - 0
src/main_test.go

@@ -7,6 +7,7 @@ import (
 	. "jy/mongodbutil"
 	"log"
 	"os"
+	"qfw/util"
 	"regexp"
 	"strconv"
 	"strings"
@@ -111,3 +112,24 @@ func Test_clear(t *testing.T) {
 	}
 	log.Println("result---", value)
 }
+
+func Test_buyer(t *testing.T) {
+	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
+	demo, _ := Mgo.Find("demo_data", nil, `{"_id:1"}`, `{"buyer":1,"title":1}`, false, -1, -1)
+	result, _ := Mgo.Find("mxs_buyer", nil, `{"_id:1"}`, `{"buyer":1}`, false, -1, -1)
+	for _, d := range *demo {
+		id1 := util.BsonIdToSId(d["_id"])
+		buyer1 := util.ObjToString(d["buyer"])
+		title := util.ObjToString(d["title"])
+		for _, r := range *result {
+			id2 := util.BsonIdToSId(r["_id"])
+			buyer2 := util.ObjToString(r["buyer"])
+			if id1 == id2 {
+				if buyer1 != buyer2 {
+					util.Debug(id1, buyer1, buyer2)
+				}
+				break
+			}
+		}
+	}
+}

+ 14 - 5
src/res/fieldscore.json

@@ -18,6 +18,14 @@
                 "regexp": 1,
                 "kvweight": 1
             },
+            "buyer":{
+            	"title": 0,
+				"table": 5,
+                "colon": 5,
+                "space": 5,
+                "regexp": 3,
+                "kvweight": 3
+            },
             "winner": {
                 "table": 3,
                 "colon": 3,
@@ -147,15 +155,16 @@
         "positivewords": [
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(委员会|中心|分校|办公室|学校|幼儿园|动物园|管理站|图书馆|殡仪馆|博物馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|初中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司|监狱|监测站|血站|检查站)$",
-                "score": 3
+                "regstr": ".{2,100}(委员会|管委会|医院|卫计委|机关|社区|中心|中心校|分校|办公室|学校|幼儿园|动物园|管理站|馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|
+[初高]中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司|监狱|监测站|血站|检查站|工作站)$",
+                "score": 10
             }
         ],
         "negativewords": [
             {
                 "describe": "包含负分",
                 "regstr": "(标人|附件|委托|认证|代理|咨询|顾问|管理有限公司|管理顾问|招标失败|交易中心|不足|公告|变更|招标|废标|废止|流标|中标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
-                "score": -5
+                "score": -20
             },
             {
                 "describe": "包含负分不再展示",
@@ -174,7 +183,7 @@
                 "range": [
                     0,
                     3,
-                    -5
+                    -20
                 ]
             },
             {
@@ -190,7 +199,7 @@
                 "range": [
                     25,
                     -1,
-                    -1
+                    -5
                 ]
             }
         ]

+ 2 - 2
src/web/templates/admin/audit_auditone.html

@@ -154,8 +154,8 @@ $(function () {
 	});
 	ttable.on('init.dt', function () {
 		var opt="<option value='-1'>全部</option>"+
-				"<option value='ok'>正确</option>"+
-				"<option value='err'>异常</option>";
+				"<option value='1'>正确</option>"+
+				"<option value='0'>异常</option>";
 		var select="<div class='form-group'><label for='name'>数据类型:</label>"+
 			"<select id='auditattr' onchange='checkclick(this.value)' class='form-control input-sm'>"+
 			opt+