|
@@ -11,6 +11,7 @@ import (
|
|
|
qu "qfw/util"
|
|
|
"qfw/util/redis"
|
|
|
"regexp"
|
|
|
+ "sort"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
"sync"
|
|
@@ -26,12 +27,12 @@ import (
|
|
|
var (
|
|
|
lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
|
|
|
|
|
|
- cut = ju.NewCut() //获取正文并清理
|
|
|
- ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
- TaskList map[string]*ExtractTask //任务列表
|
|
|
- ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
- saveLimit = 100 //抽取日志批量保存
|
|
|
- PageSize = 5000 //查询分页
|
|
|
+ cut = ju.NewCut() //获取正文并清理
|
|
|
+ ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
+ TaskList map[string]*ExtractTask //任务列表
|
|
|
+ ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
+ saveLimit = 100 //抽取日志批量保存
|
|
|
+ PageSize = 5000 //查询分页
|
|
|
Fields = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1}`
|
|
|
Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
|
|
|
)
|
|
@@ -318,6 +319,9 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
|
|
|
RuleBlock: e.RuleBlock,
|
|
|
}
|
|
|
+ if (j.Jsondata != nil||(*j.Jsondata) != nil) && (*j.Jsondata)["jsoncontent"]!= nil{
|
|
|
+ delete((*j.Jsondata),"jsoncontent")
|
|
|
+ }
|
|
|
if isextFile {
|
|
|
jf = &ju.Job{
|
|
|
SourceMid: qu.BsonIdToSId(doc["_id"]),
|
|
@@ -335,22 +339,31 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
RuleBlock: e.RuleBlock,
|
|
|
IsFile: isextFile,
|
|
|
}
|
|
|
+ if (jf.Jsondata != nil||(*jf.Jsondata) != nil) && (*jf.Jsondata)["jsoncontent"]!= nil{
|
|
|
+ delete((*jf.Jsondata),"jsoncontent")
|
|
|
+ }
|
|
|
}
|
|
|
- //是否配置站点
|
|
|
codeSite := j.SpiderCode
|
|
|
- exp, isSite := e.Luacodes.Load(codeSite)
|
|
|
+ //是否启用站点
|
|
|
+ if value, ok := e.SiteMerge.Load(codeSite); ok {
|
|
|
+ isSite = value.(bool)
|
|
|
+ }
|
|
|
if isSite {
|
|
|
- if exp.(map[string]interface{})["e.SiteClearFn"] != nil {
|
|
|
- e.SiteClearFn = exp.(map[string]interface{})["e.SiteClearFn"].(map[string][]string)
|
|
|
- }
|
|
|
- if exp.(map[string]interface{})["e.SiteTag"] != nil {
|
|
|
- e.SiteTag = exp.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)
|
|
|
- }
|
|
|
- if exp.(map[string]interface{})["e.SiteRuleCores"] != nil {
|
|
|
- e.SiteRuleCores = exp.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore)
|
|
|
- }
|
|
|
- if exp.(map[string]interface{})["e.SiteRuleBacks"] != nil {
|
|
|
- e.SiteRuleBacks = exp.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo)
|
|
|
+ //是否配置站点
|
|
|
+ exp, isSite := e.Luacodes.Load(codeSite)
|
|
|
+ if isSite {
|
|
|
+ if exp.(map[string]interface{})["e.SiteClearFn"] != nil {
|
|
|
+ e.SiteClearFn = exp.(map[string]interface{})["e.SiteClearFn"].(map[string][]string)
|
|
|
+ }
|
|
|
+ if exp.(map[string]interface{})["e.SiteTag"] != nil {
|
|
|
+ e.SiteTag = exp.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)
|
|
|
+ }
|
|
|
+ if exp.(map[string]interface{})["e.SiteRuleCores"] != nil {
|
|
|
+ e.SiteRuleCores = exp.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore)
|
|
|
+ }
|
|
|
+ if exp.(map[string]interface{})["e.SiteRuleBacks"] != nil {
|
|
|
+ e.SiteRuleBacks = exp.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
qu.Try(func() {
|
|
@@ -587,6 +600,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
|
|
|
if istrue, ok := data[len(data)-1].(bool); istrue && ok {
|
|
|
j.Result[key][i].IsTrue = true
|
|
|
} else {
|
|
|
+ j.Result[key][i].Value = data[0]
|
|
|
continue
|
|
|
}
|
|
|
}
|
|
@@ -654,6 +668,7 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
|
|
|
if value, ok := e.FileFields.Load(vc.Field); ok && qu.IntAllDef(value, 1) > 0 {
|
|
|
ExtRuleCore(tmp, e, vc, j, isSite)
|
|
|
}
|
|
|
+
|
|
|
// log.Debug("抽取-规则", tmp)
|
|
|
|
|
|
//抽取-后置规则
|
|
@@ -757,9 +772,6 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
|
|
|
for k, v := range []*ju.JobKv{bp.ColonKV, bp.SpaceKV, bp.TableKV} {
|
|
|
if k == 0 {
|
|
|
tp = "colon"
|
|
|
- // for _, vv := range v.Kvs {
|
|
|
- // qu.Debug(vv.Key, vv.Value)
|
|
|
- // }
|
|
|
} else if k == 1 {
|
|
|
tp = "space"
|
|
|
} else if k == 2 {
|
|
@@ -962,14 +974,20 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
|
|
|
cfn := e.ClearFn[in.Field]
|
|
|
lock.Unlock()
|
|
|
data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content})
|
|
|
- j.BlockPackage[k].Budget = qu.Float64All(data[0])
|
|
|
+ if data[len(data)-1].(bool){
|
|
|
+ j.BlockPackage[k].Budget = qu.Float64All(data[0])
|
|
|
+ j.BlockPackage[k].IsTrueBudget = true
|
|
|
+ }
|
|
|
break
|
|
|
} else if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
|
|
|
lock.Lock()
|
|
|
cfn := e.ClearFn[in.Field]
|
|
|
lock.Unlock()
|
|
|
data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content})
|
|
|
- j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
|
|
|
+ if data[len(data)-1].(bool){
|
|
|
+ j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
|
|
|
+ j.BlockPackage[k].IsTrueBidamount = true
|
|
|
+ }
|
|
|
break
|
|
|
} else if in.Field == "winner" {
|
|
|
if j.BlockPackage[k].Winner == "" {
|
|
@@ -1008,7 +1026,10 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
|
|
|
cfn := e.ClearFn[in.Field]
|
|
|
lock.Unlock()
|
|
|
data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
|
|
|
- j.BlockPackage[k].Budget = qu.Float64All(data[0])
|
|
|
+ if data[len(data)-1].(bool){
|
|
|
+ j.BlockPackage[k].Budget = qu.Float64All(data[0])
|
|
|
+ j.BlockPackage[k].IsTrueBudget = true
|
|
|
+ }
|
|
|
break
|
|
|
}
|
|
|
if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
|
|
@@ -1016,7 +1037,10 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
|
|
|
cfn := e.ClearFn[in.Field]
|
|
|
lock.Unlock()
|
|
|
data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
|
|
|
- j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
|
|
|
+ if data[len(data)-1].(bool){
|
|
|
+ j.BlockPackage[k].Bidamount = qu.Float64All(data[0])
|
|
|
+ j.BlockPackage[k].IsTrueBidamount = true
|
|
|
+ }
|
|
|
break
|
|
|
} else if in.Field == "bidstatus" {
|
|
|
if j.BlockPackage[k].BidStatus == "" {
|
|
@@ -1047,13 +1071,14 @@ func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]ma
|
|
|
kvmap[vc.Field] = append(kvmap[vc.Field], map[string]interface{}{
|
|
|
"code": "winnerorder",
|
|
|
"field": vc.Field,
|
|
|
- "ruletext": "中标候选人",
|
|
|
+ "ruletext": "中标候选人_"+ v["sortstr"].(string),
|
|
|
"extfrom": v["sortstr"],
|
|
|
"sourcevalue": v["price"],
|
|
|
"value": v["price"],
|
|
|
"type": "winnerorder",
|
|
|
"matchtype": "winnerorder",
|
|
|
})
|
|
|
+ return kvmap, false
|
|
|
}
|
|
|
//候选人中标金额
|
|
|
if price := j.Winnerorder[0]["price"]; price != nil {
|
|
@@ -1126,6 +1151,14 @@ func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kv
|
|
|
// }
|
|
|
} else if k == 1 {
|
|
|
tp = "space"
|
|
|
+ // for _, vv := range v.Kvs {
|
|
|
+ // qu.Debug("space-kvs:", vv.Key, vv.Value)
|
|
|
+ // }
|
|
|
+ // for kkk, vv := range v.KvTags {
|
|
|
+ // for _, vvv := range vv {
|
|
|
+ // qu.Debug("space-tags", kkk, vvv.Key, vvv.Value)
|
|
|
+ // }
|
|
|
+ // }
|
|
|
} else if k == 2 {
|
|
|
tp = "table"
|
|
|
// for _, vv := range v.Kvs {
|
|
@@ -1563,6 +1596,7 @@ type FieldValue struct {
|
|
|
Value interface{}
|
|
|
Count int
|
|
|
}
|
|
|
+var clearWinnerReg =regexp.MustCompile("名称|施工|拟定供应商名称|:|:")
|
|
|
|
|
|
//分析抽取结果并保存
|
|
|
func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
@@ -1583,7 +1617,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
for _, val := range result {
|
|
|
for _, v := range val { //取第一个非负数,项目名称除外
|
|
|
//存0是否有效
|
|
|
- if v.Field == "bidamount" || v.Field == "budget" && v.IsTrue {
|
|
|
+ if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue {
|
|
|
tmp[v.Field] = v.Value
|
|
|
break
|
|
|
}
|
|
@@ -1598,22 +1632,70 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
if len(j.PackageInfo) > 0 { //分包信息
|
|
|
tmp["package"] = j.PackageInfo
|
|
|
+ //包预算,中标金额合并大于抽取就覆盖
|
|
|
var tmpBidamount, tmpBudget float64
|
|
|
+ //s_winner逗号分隔拼接,分包中标人
|
|
|
+ var tmpstr,savewinner []string
|
|
|
+ //按包排序
|
|
|
+ for b, v := range j.PackageInfo {
|
|
|
+ if v["winner"]!= nil && v["winner"]!=""{
|
|
|
+ tmpstr = append(tmpstr,b)
|
|
|
+ }
|
|
|
+ }
|
|
|
//包预算,中标金额合并大于抽取就覆盖
|
|
|
- for _, v := range j.PackageInfo {
|
|
|
- if v["budget"] != nil {
|
|
|
- tmpBudget += qu.Float64All(v["budget"])
|
|
|
+ if len(j.PackageInfo) >1{
|
|
|
+ //包数大于1累加
|
|
|
+ for _, v := range j.PackageInfo {
|
|
|
+ if v["budget"] != nil {
|
|
|
+ tmpBudget += qu.Float64All(v["budget"])
|
|
|
+ }
|
|
|
+ if v["bidamount"] != nil {
|
|
|
+ tmpBidamount += qu.Float64All(v["bidamount"])
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if qu.Float64All(tmp["budget"]) < tmpBudget {
|
|
|
+ tmp["budget"] = tmpBudget
|
|
|
+ }
|
|
|
+ if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
|
|
|
+ tmp["bidamount"] = tmpBidamount
|
|
|
+ }
|
|
|
+ }else {
|
|
|
+ //包数等于1,tmp没有值取包里的值
|
|
|
+ if tmp["budget"] == nil || tmp["budget"] == 0 {
|
|
|
+ for _,v := range j.PackageInfo {
|
|
|
+ if v["budget"] != nil {
|
|
|
+ tmp["budget"] = v["budget"]
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
}
|
|
|
- if v["bidamount"] != nil {
|
|
|
- tmpBidamount += qu.Float64All(v["bidamount"])
|
|
|
+ if tmp["bidamount"] == nil || tmp["bidamount"] == 0 {
|
|
|
+ for _,v := range j.PackageInfo {
|
|
|
+ if v["bidamount"] != nil {
|
|
|
+ tmp["bidamount"] = v["bidamount"]
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- if qu.Float64All(tmp["budget"]) < tmpBudget {
|
|
|
- tmp["budget"] = tmpBudget
|
|
|
+ //s_winner逗号分隔拼接,分包中标人
|
|
|
+ sort.Strings(tmpstr)
|
|
|
+ for _,v := range tmpstr{
|
|
|
+ svvvv := qu.ObjToString(j.PackageInfo[v]["winner"])
|
|
|
+ savevvv := clearWinnerReg.ReplaceAllString(svvvv, "")
|
|
|
+ if savevvv == ""{
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ savewinner = append(savewinner,savevvv)
|
|
|
}
|
|
|
- if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
|
|
|
- tmp["bidamount"] = tmpBidamount
|
|
|
+ if (savewinner == nil || len(savewinner)==0) && tmp["winner"]!=nil{
|
|
|
+ tmp["s_winner"] = tmp["winner"]
|
|
|
+ }else if savewinner != nil{
|
|
|
+ tmp["s_winner"] = strings.Join(savewinner,",")
|
|
|
}
|
|
|
+
|
|
|
+ }else if tmp["winner"]!= nil && tmp["winner"]!=""{
|
|
|
+ //没有分包取winner
|
|
|
+ tmp["s_winner"] = tmp["winner"]
|
|
|
}
|
|
|
if len(j.Winnerorder) > 0 { //候选人信息
|
|
|
tmp["winnerorder"] = j.Winnerorder
|
|
@@ -1721,6 +1803,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ // fmt.Println("=============抽取结果================")
|
|
|
+ // for k, v := range tmp {
|
|
|
+ // qu.Debug(k, "---", v)
|
|
|
+ // }
|
|
|
//tmp["extract_content"] = j.Content
|
|
|
if e.TaskInfo.TestColl == "" {
|
|
|
if len(tmp) > 0 { //保存抽取结果
|
|
@@ -1765,13 +1851,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
e.RWMutex.Unlock()
|
|
|
}
|
|
|
} else { //测试结果
|
|
|
- // fmt.Println("=============抽取结果================")
|
|
|
- // for k, v := range tmp {
|
|
|
- // qu.Debug(k, "---", v)
|
|
|
- // }
|
|
|
- // for field, _ := range e.Fields {
|
|
|
- // qu.Debug(field, "---", tmp[field])
|
|
|
- // }
|
|
|
delete(tmp, "_id")
|
|
|
if len(j.BlockPackage) > 0 { //分包详情
|
|
|
bs, _ := json.Marshal(j.BlockPackage)
|
|
@@ -1967,7 +2046,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
|
|
|
func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
|
|
|
defer qu.Catch()
|
|
|
i := redis.GetInt(field, field+"_"+fv) //查找redis
|
|
|
- if i == 0 { //reids未找到,执行规则匹配
|
|
|
+ if i == 0 { //reids未找到,执行规则匹配
|
|
|
val[field+"_isredis"] = false
|
|
|
e.RuleMatch(field, fv, val) //规则匹配
|
|
|
} else { //redis找到,打标识存库
|
|
@@ -2059,7 +2138,10 @@ func resetWinnerorder(j *ju.Job) {
|
|
|
if maxlen > 0 {
|
|
|
winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
|
|
|
if j.Winnerorder[0]["price"] != nil {
|
|
|
- bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["price"], Score: 0.5})
|
|
|
+ tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"],""})
|
|
|
+ if tmpPrice[len(tmpPrice)-1].(bool){
|
|
|
+ bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder",SourceValue:j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 0.5})
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
if j.Result["winner"] == nil && len(winners) > 0 {
|