zhangjinkun 5 yıl önce
ebeveyn
işleme
c5bbe5fb9a
2 değiştirilmiş dosya ile 15 ekleme ve 13 silme
  1. 14 12
      src/jy/extract/extract.go
  2. 1 1
      src/res/fieldscore.json

+ 14 - 12
src/jy/extract/extract.go

@@ -26,12 +26,12 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask          //任务列表
-	ClearTaskList map[string]*ClearTask            //清理任务列表
-	saveLimit     = 100                            //抽取日志批量保存
-	PageSize      = 5000                           //查询分页
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 100                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -290,11 +290,12 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		subtype = "all"
 	}
 	toMap := qu.ObjToMap(doc["jsondata"])
-	if toMap != nil {
-		if (*toMap)["extweight"] == nil {
-			(*toMap)["extweight"] = ju.Config["jsondata_extweight"]
-		}
-	}
+	//	log.Debug("toMap", toMap)
+	//	if toMap != nil {
+	//		if (*toMap)["extweight"] == nil {
+	//			(*toMap)["extweight"] = ju.Config["jsondata_extweight"]
+	//		}
+	//	}
 	j = &ju.Job{
 		SourceMid:      qu.BsonIdToSId(doc["_id"]),
 		Category:       toptype,
@@ -1104,6 +1105,7 @@ func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kv
 						"blocktag":    bl.Classify,
 						"weight":      vv.Weight,
 					})
+					break //暂定取第一个
 				}
 			}
 		}
@@ -1870,7 +1872,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 { //reids未找到,执行规则匹配
+	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 1 - 1
src/res/fieldscore.json

@@ -200,7 +200,7 @@
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "(附件|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
+                "regstr": "(附件|否决原因|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
                 "score": -10
             },
 			{