Parcourir la source

1.金额处理

fengweiqiang il y a 5 ans
Parent
commit
8038899d7c

+ 8 - 1
src/jy/clear/tonumber.go

@@ -98,10 +98,17 @@ func ObjToMoney(data []interface{}) []interface{} {
 			f = f * 10000
 		}
 	}
+	if f == 0 && !moneyUnitRegBool.MatchString(fmt.Sprint(data[0])) {
+		data = append(data, false)
+		return data
+	}
+	data = append(data, true)
 	data[0] = f
 	return data
 }
-
+//["中标金额","成交金额","合同金额","中标价","成交价","成交价格","中标(成交)金额","投标报价","中标标价","成交结果"]
+//["0元","零元","0.0万元","¥0元"]
+var moneyUnitRegBool = regexp.MustCompile(`(中标金额|成交金额|合同金额|中标价|成交价|成交价格|中标\(成交\)金额|投标报价|中标标价|成交结果)?[::\s]?(0元|零元|0.0万元|¥0元|0)+`)
 //数字金额转换
 func numMoney(data []interface{}) ([]interface{}, bool) {
 	tmp := fmt.Sprintf("%f",data[0])

+ 17 - 2
src/jy/extract/extpackage.go

@@ -7,7 +7,7 @@ import (
 	"log"
 	qu "qfw/util"
 	"reflect"
-	"regexp"
+	regexp "regexp"
 	"sort"
 )
 
@@ -60,6 +60,13 @@ func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{},
 					cfn := e.ClearFn["budget"]
 					lock.Unlock()
 					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
+					if data[0] ==0{
+						if istrue,ok:= data[len(data)-1].(bool);istrue&&ok{
+							(*sonJobResult)["budget"] = data[0]
+						}else {
+							continue
+						}
+					}
 					(*sonJobResult)["budget"] = data[0]
 					continue
 				}
@@ -68,10 +75,18 @@ func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{},
 					cfn := e.ClearFn["budget"]
 					lock.Unlock()
 					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
+					if istrue,ok:= data[len(data)-1].(bool);istrue&&ok{
+						(*sonJobResult)["budget"] = data[0]
+					}else {
+						continue
+					}
 					(*sonJobResult)["bidamount"] = data[0]
 					continue
 				}
 				if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "") && tags[0].Key == "中标单位" {
+					if winnerorderNotReg.MatchString(pv[0].Value){
+						continue
+					}
 					(*sonJobResult)["winner"] = pv[0].Value
 				}
 
@@ -82,7 +97,7 @@ func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{},
 		}
 	}
 }
-
+var winnerorderNotReg =regexp.MustCompile(`(附件|否决原因|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\d[\s]{0,10}(\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})`)
 //处理分包信息
 func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 	qu.Try(func() {

+ 38 - 22
src/jy/extract/extract.go

@@ -26,12 +26,12 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut           = ju.NewCut()                          //获取正文并清理
-	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask                //任务列表
-	ClearTaskList map[string]*ClearTask                  //清理任务列表
-	saveLimit     = 100                                  //抽取日志批量保存
-	PageSize      = 5000                                 //查询分页
+	cut     = ju.NewCut()                          //获取正文并清理
+	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask          //任务列表
+	ClearTaskList map[string]*ClearTask            //清理任务列表
+	saveLimit     = 100                            //抽取日志批量保存
+	PageSize      = 5000                           //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -568,9 +568,8 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 			}
 		}
 		//函数清理
-
 		for key, val := range j.Result {
-			for _, v := range val {
+			for i, v := range val {
 				//qu.Debug(key, v.Value)
 				lockclear.Lock()
 				var cfn = []string{}
@@ -584,6 +583,13 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 					continue
 				}
 				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
+				if key == "budget" || key == "bidamount" {
+						if istrue, ok := data[len(data)-1].(bool); istrue && ok {
+							j.Result[key][i].IsTrue = true
+						} else {
+							continue
+						}
+				}
 				before, _ := v.Value.(string)
 				v.Value = data[0]
 				BeforeAddClearFnLog(v.Type, "函数清理", j.SourceMid, before, v.MatchType, v, e)
@@ -1576,6 +1582,11 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				} else if v.Field == "projectname" {
 					tmp[v.Field] = v.Value
 					break
+				} else if v.Field == "bidamount"||v.Field =="budget"{
+					if v.IsTrue{
+						tmp[v.Field] =v.Value
+						break
+					}
 				}
 			}
 		}
@@ -1691,19 +1702,19 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		//tmp["extract_content"] = j.Content
 		if e.TaskInfo.TestColl == "" {
 			if len(tmp) > 0 { //保存抽取结果
-			/*	if len(e.SiteFields) <= 0 {
-					//for field, _ := range e.Fields {
-					//	if tmp[field] == nil &&  {
-					//		tmp[field] = "" //覆盖之前版本数据
-					//	}
-					//}
-				} else {
-					//for field, _ := range e.SiteFields {
-					//	if tmp[field] == nil &&{
-					//		tmp[field] = "" //覆盖之前版本数据
-					//	}
-					//}
-				}*/
+				/*	if len(e.SiteFields) <= 0 {
+						//for field, _ := range e.Fields {
+						//	if tmp[field] == nil &&  {
+						//		tmp[field] = "" //覆盖之前版本数据
+						//	}
+						//}
+					} else {
+						//for field, _ := range e.SiteFields {
+						//	if tmp[field] == nil &&{
+						//		tmp[field] = "" //覆盖之前版本数据
+						//	}
+						//}
+					}*/
 				tmp["repeat"] = 0
 				tmparr := []map[string]interface{}{
 					map[string]interface{}{
@@ -1877,6 +1888,11 @@ func auxInfo(j *ju.Job) map[string][]map[string]interface{} {
 					standardized = true
 				}
 			}
+			if field == "budget"||field == "bidamount"{
+				if !v.IsTrue{
+					continue
+				}
+			}
 			sfield := map[string]interface{}{
 				"val":          v.Value,
 				"type":         v.Type,
@@ -1929,7 +1945,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 {                            //reids未找到,执行规则匹配
+	if i == 0 { //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 14 - 4
src/jy/extract/score.go

@@ -114,9 +114,19 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 		for tmpsindex, tmpsvalue := range tmps {
 			//没有抽取到值,不打分
 			if string_value := fmt.Sprint(tmpsvalue.Value); string_value == "" || string_value == "0" || string_value == "<nil>" {
-				tmps[tmpsindex].Score = -10
-				tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `value结果为空直接-10分`, Code: field, Value: tmpsvalue.Value, Score: -10})
-				continue
+				if field == "budget" || field == "bidamount" {
+					if tmpsvalue.IsTrue {
+						//continue
+					}else {
+						tmps[tmpsindex].Score = -10
+						tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `value结果为空直接-10分`, Code: field, Value: tmpsvalue.Value, Score: -10})
+						continue
+					}
+				}else {
+					tmps[tmpsindex].Score = -10
+					tmps[tmpsindex].ScoreItem = append(tmps[tmpsindex].ScoreItem, &ju.ScoreItem{Des: `value结果为空直接-10分`, Code: field, Value: tmpsvalue.Value, Score: -10})
+					continue
+				}
 			}
 			lockscore.Lock()
 			describe := qu.ObjToString(SoreConfig["extractype"]["describe"])
@@ -270,7 +280,7 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 				max := qu.IntAll(scoreRule["max"])
 				val := qu.IntAll(tmpsvalue.Value)
 				scores, _ := scoreRule["score"].([]interface{})
-				if len(scores) < 3 || val == 0 {
+				if len(scores) < 3 || val < 0 {
 					continue
 				}
 				if val < min && 0 < val {

+ 1 - 0
src/jy/util/article.go

@@ -55,6 +55,7 @@ type ExtField struct {
 	ScoreItem   []*ScoreItem      //打分项
 	Weight      int               //权重值
 	ValRepeat   int               //结果值重复次数,打分参考
+	IsTrue      bool              //针对金额0是否有效的值,其他字段不参考
 }
 
 //打分项

+ 0 - 1
src/main.go

@@ -16,7 +16,6 @@ import (
 	log "github.com/donnie4w/go-logger/logger"
 	"qfw/util/elastic"
 	"qfw/util/redis"
-
 )
 
 func init() {

+ 2 - 2
src/res/fieldscore.json

@@ -200,7 +200,7 @@
         "negativewords": [
             {
                 "describe": "包含负分",
-                "regstr": "(附件|否决原因|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
+                "regstr": "(附件|否决原因|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
                 "score": -10
             },
 			{
@@ -557,7 +557,7 @@
     "bidamount": {
         "type": "float",
         "describe": "min>val:1,min<=val<=max:3,max<val:1",
-        "min": 1000,
+        "min": -0.999,
         "max": 1000000000,
         "score": [
             -3,