|
@@ -10,7 +10,7 @@ import (
|
|
|
ju "jy/util"
|
|
|
"log"
|
|
|
qu "qfw/util"
|
|
|
- redis "qfw/util/redis"
|
|
|
+ "qfw/util/redis"
|
|
|
"reflect"
|
|
|
"regexp"
|
|
|
"strconv"
|
|
@@ -22,13 +22,13 @@ import (
|
|
|
)
|
|
|
|
|
|
var (
|
|
|
- lock sync.RWMutex
|
|
|
- cut = ju.NewCut() //获取正文并清理
|
|
|
- ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
- TaskList map[string]*ExtractTask //任务列表
|
|
|
- ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
- saveLimit = 200 //抽取日志批量保存
|
|
|
- PageSize = 5000 //查询分页
|
|
|
+ lock sync.RWMutex
|
|
|
+ cut = ju.NewCut() //获取正文并清理
|
|
|
+ ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
|
|
|
+ TaskList map[string]*ExtractTask //任务列表
|
|
|
+ ClearTaskList map[string]*ClearTask //清理任务列表
|
|
|
+ saveLimit = 200 //抽取日志批量保存
|
|
|
+ PageSize = 5000 //查询分页
|
|
|
Fields = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1}`
|
|
|
Fields2 = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
|
|
|
)
|
|
@@ -59,7 +59,8 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
|
|
|
|
|
|
//品牌抽取是否开启
|
|
|
ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
|
|
|
-
|
|
|
+ //附件抽取是否开启
|
|
|
+ ext.InitFile()
|
|
|
return RunExtractTestTask(ext, startId, num)
|
|
|
}
|
|
|
|
|
@@ -80,12 +81,10 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
|
|
|
continue
|
|
|
}
|
|
|
var j, jf *ju.Job
|
|
|
- if ext.IsFileField{
|
|
|
- if v["projectinfo"] != nil {
|
|
|
+ if ext.IsFileField &&v["projectinfo"] != nil {
|
|
|
v["isextFile"] = true
|
|
|
j, jf = PreInfo(v)
|
|
|
- }
|
|
|
- }else {
|
|
|
+ } else {
|
|
|
j, _ = PreInfo(v)
|
|
|
}
|
|
|
ext.TaskInfo.ProcessPool <- true
|
|
@@ -131,6 +130,8 @@ func StartExtractTaskId(taskId string) bool {
|
|
|
|
|
|
//品牌抽取是否开启
|
|
|
ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
|
|
|
+ //附件抽取是否开启
|
|
|
+ ext.InitFile()
|
|
|
|
|
|
ext.IsRun = true
|
|
|
go ext.ResultSave()
|
|
@@ -181,12 +182,10 @@ func RunExtractTask(taskId string) {
|
|
|
break
|
|
|
}
|
|
|
var j, jf *ju.Job
|
|
|
- if ext.IsFileField{
|
|
|
- if v["projectinfo"] != nil {
|
|
|
- v["isextFile"] = true
|
|
|
- j, jf = PreInfo(v)
|
|
|
- }
|
|
|
- }else {
|
|
|
+ if ext.IsFileField && v["projectinfo"] != nil {
|
|
|
+ v["isextFile"] = true
|
|
|
+ j, jf = PreInfo(v)
|
|
|
+ } else {
|
|
|
j, _ = PreInfo(v)
|
|
|
}
|
|
|
ext.TaskInfo.ProcessPool <- true
|
|
@@ -207,7 +206,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
|
|
|
defer qu.Catch()
|
|
|
//判断是否有附件这个字段
|
|
|
var isextFile bool
|
|
|
- if doc["isextFile"] != nil{
|
|
|
+ if doc["isextFile"] != nil {
|
|
|
isextFile = doc["isextFile"].(bool)
|
|
|
}
|
|
|
detail := ""
|
|
@@ -223,7 +222,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
|
|
|
doc["detail"] = detail
|
|
|
|
|
|
if isextFile {
|
|
|
- file2text(&doc) //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
|
|
|
+ file2text(&doc) //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
|
|
|
}
|
|
|
toptype := qu.ObjToString(doc["toptype"])
|
|
|
if qu.ObjToString(doc["type"]) == "bid" {
|
|
@@ -258,6 +257,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
|
|
|
Province: qu.ObjToString(doc["area"]),
|
|
|
Result: map[string][]*ju.ExtField{},
|
|
|
BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
|
|
|
+ IsFile: isextFile,
|
|
|
}
|
|
|
}
|
|
|
qu.Try(func() {
|
|
@@ -298,26 +298,28 @@ func file2text(doc *map[string]interface{}) {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- if utf8.RuneCountInString(strfileinfo.String()) < qu.IntAllDef(ju.Config["filelength"],100000 ){
|
|
|
+ if utf8.RuneCountInString(strfileinfo.String()) < qu.IntAllDef(ju.Config["filelength"], 100000) {
|
|
|
(*doc)["detailfile"] = strfileinfo.String() //附件文本堆一起(后期可以考虑,分开处理)
|
|
|
}
|
|
|
}
|
|
|
|
|
|
//抽取
|
|
|
func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
|
|
|
+ e.ExtractDetail(j)
|
|
|
+ if jf !=nil &&jf.IsFile {
|
|
|
+ e.ExtractFile(jf)
|
|
|
+ }
|
|
|
+ //分析抽取结果并保存 todo
|
|
|
+ AnalysisSaveResult(j, jf, e)
|
|
|
+ <-e.TaskInfo.ProcessPool
|
|
|
+}
|
|
|
+
|
|
|
+func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
qu.Try(func() {
|
|
|
doc := *j.Data
|
|
|
- docfile := make(map[string]interface{})
|
|
|
- if jf != nil{
|
|
|
- docfile = *jf.Data
|
|
|
- docfile["dockey"]= "detailfile"
|
|
|
- }
|
|
|
//全局前置规则,结果覆盖doc属性
|
|
|
for _, v := range e.RulePres {
|
|
|
doc = ExtRegPre(doc, j, v, e.TaskInfo)
|
|
|
- if jf != nil{
|
|
|
- docfile = ExtRegPre(docfile, jf, v, e.TaskInfo)
|
|
|
- }
|
|
|
}
|
|
|
//抽取规则
|
|
|
for _, vc := range e.RuleCores {
|
|
@@ -351,46 +353,106 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
|
|
|
}
|
|
|
//log.Println("抽取-后置规则", tmp)
|
|
|
}
|
|
|
- //抽取规则-附件
|
|
|
- if jf != nil{
|
|
|
- for _, vc := range e.RuleCores {
|
|
|
- tmp := ju.DeepCopy(docfile).(map[string]interface{})
|
|
|
- //是否进入逻辑
|
|
|
- if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
- continue
|
|
|
- }
|
|
|
- //抽取-前置规则
|
|
|
- for _, v := range vc.RulePres {
|
|
|
- tmp = ExtRegPre(tmp, jf, v, e.TaskInfo)
|
|
|
- }
|
|
|
- //log.Println("抽取-前置规则", tmp)
|
|
|
|
|
|
- //抽取-规则
|
|
|
- for _, v := range vc.RuleCores {
|
|
|
- ExtRegCore(vc.ExtFrom, tmp, jf, v, e)
|
|
|
+ //全局后置规则
|
|
|
+ for _, v := range e.RuleBacks {
|
|
|
+ ExtRegBack(j, v, e.TaskInfo)
|
|
|
+ }
|
|
|
+ //候选人加入
|
|
|
+ if len(j.Winnerorder) > 0 {
|
|
|
+ winner := &ju.ExtField{
|
|
|
+ Field: "winner",
|
|
|
+ Code: "",
|
|
|
+ RuleText: "",
|
|
|
+ Type: "winnerorder",
|
|
|
+ MatchType: "winnerorder",
|
|
|
+ ExtFrom: "",
|
|
|
+ Value: j.Winnerorder[0]["entname"],
|
|
|
+ Score: 0,
|
|
|
+ }
|
|
|
+ if len([]rune(qu.ObjToString(j.Winnerorder[0]["entname"]))) < 4 {
|
|
|
+ winner.Score = -5
|
|
|
+ }
|
|
|
+ winners := j.Result["winner"]
|
|
|
+ if winners != nil {
|
|
|
+ winners = append(winners, winner)
|
|
|
+ } else {
|
|
|
+ winners = []*ju.ExtField{}
|
|
|
+ winners = append(winners, winner)
|
|
|
+ }
|
|
|
+ j.Result["winner"] = winners
|
|
|
+ }
|
|
|
+ //函数清理
|
|
|
+ for key, val := range j.Result {
|
|
|
+ for _, v := range val {
|
|
|
+ lock.Lock()
|
|
|
+ cfn := e.ClearFn[key]
|
|
|
+ lock.Unlock()
|
|
|
+ data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
|
|
|
+ v.Value = data[0]
|
|
|
+ //清理特殊符号
|
|
|
+ lock.Lock()
|
|
|
+ if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
|
|
|
+ clear.MesField[key] != nil {
|
|
|
+ text := qu.ObjToString(v.Value)
|
|
|
+ text = clear.OtherClean(key, text)
|
|
|
+ v.Value = text
|
|
|
}
|
|
|
- //log.Println("抽取-规则", tmp)
|
|
|
+ lock.Unlock()
|
|
|
+ }
|
|
|
+ }
|
|
|
+ PackageDetail(j, e) //处理分包信息
|
|
|
+ // bs, _ := json.Marshal(j.Result)
|
|
|
+ // log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
|
|
|
+ }, func(err interface{}) {
|
|
|
+ log.Println("ExtractProcess err", err)
|
|
|
+ })
|
|
|
+}
|
|
|
+func (e *ExtractTask) ExtractFile(j *ju.Job) {
|
|
|
+ qu.Try(func() {
|
|
|
+ doc := *j.Data
|
|
|
+ //全局前置规则,结果覆盖doc属性
|
|
|
+ for _, v := range e.RulePres {
|
|
|
+ if e.FileFields[v.Field] > 0 {
|
|
|
+ doc = ExtRegPre(doc, j, v, e.TaskInfo)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //抽取规则
|
|
|
+ for _, vc := range e.RuleCores {
|
|
|
+ tmp := ju.DeepCopy(doc).(map[string]interface{})
|
|
|
+ //是否进入逻辑
|
|
|
+ if !ju.Logic(vc.LuaLogic, tmp) {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ //抽取-前置规则
|
|
|
+ for _, v := range vc.RulePres {
|
|
|
+ if e.FileFields[vc.Field] > 0 {
|
|
|
+ tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //log.Println("抽取-前置规则", tmp)
|
|
|
|
|
|
- //项目名称未能抽取到,标题来凑
|
|
|
- if vc.Field == "projectname" {
|
|
|
- if len(jf.Result[vc.Field]) < 1 {
|
|
|
- jf.Result[vc.Field] = append(jf.Result[vc.Field], &ju.ExtField{vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, jf.Title, 0})
|
|
|
- }
|
|
|
+ //抽取-规则
|
|
|
+ for _, v := range vc.RuleCores {
|
|
|
+ if e.FileFields[vc.Field] > 0 {
|
|
|
+ ExtRegCore(vc.ExtFrom, tmp, j, v, e)
|
|
|
}
|
|
|
+ }
|
|
|
+ //log.Println("抽取-规则", tmp)
|
|
|
|
|
|
- //抽取-后置规则
|
|
|
- for _, v := range vc.RuleBacks {
|
|
|
- ExtRegBack(jf, v, e.TaskInfo)
|
|
|
+ //抽取-后置规则
|
|
|
+ for _, v := range vc.RuleBacks {
|
|
|
+ if e.FileFields[vc.Field] > 0 {
|
|
|
+ ExtRegBack(j, v, e.TaskInfo)
|
|
|
}
|
|
|
- //log.Println("抽取-后置规则", tmp)
|
|
|
}
|
|
|
+ //log.Println("抽取-后置规则", tmp)
|
|
|
}
|
|
|
|
|
|
//全局后置规则
|
|
|
for _, v := range e.RuleBacks {
|
|
|
- ExtRegBack(j, v, e.TaskInfo)
|
|
|
- if jf != nil {
|
|
|
- ExtRegBack(jf, v, e.TaskInfo)
|
|
|
+ if e.FileFields[v.Field] > 0 {
|
|
|
+ ExtRegBack(j, v, e.TaskInfo)
|
|
|
}
|
|
|
}
|
|
|
//候选人加入
|
|
@@ -417,32 +479,6 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
|
|
|
}
|
|
|
j.Result["winner"] = winners
|
|
|
}
|
|
|
- //候选人加入-附件
|
|
|
- if jf != nil{
|
|
|
- if len(jf.Winnerorder) > 0 {
|
|
|
- winner := &ju.ExtField{
|
|
|
- Field: "winner",
|
|
|
- Code: "",
|
|
|
- RuleText: "",
|
|
|
- Type: "winnerorder",
|
|
|
- MatchType: "winnerorder",
|
|
|
- ExtFrom: "",
|
|
|
- Value: jf.Winnerorder[0]["entname"],
|
|
|
- Score: 0,
|
|
|
- }
|
|
|
- if len([]rune(qu.ObjToString(jf.Winnerorder[0]["entname"]))) < 4 {
|
|
|
- winner.Score = -5
|
|
|
- }
|
|
|
- winners := jf.Result["winner"]
|
|
|
- if winners != nil {
|
|
|
- winners = append(winners, winner)
|
|
|
- } else {
|
|
|
- winners = []*ju.ExtField{}
|
|
|
- winners = append(winners, winner)
|
|
|
- }
|
|
|
- jf.Result["winner"] = winners
|
|
|
- }
|
|
|
- }
|
|
|
//函数清理
|
|
|
for key, val := range j.Result {
|
|
|
for _, v := range val {
|
|
@@ -462,42 +498,13 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
|
|
|
lock.Unlock()
|
|
|
}
|
|
|
}
|
|
|
- //函数清理-附件
|
|
|
- if jf != nil{
|
|
|
- for key, val := range jf.Result {
|
|
|
- for _, v := range val {
|
|
|
- lock.Lock()
|
|
|
- cfn := e.ClearFn[key]
|
|
|
- lock.Unlock()
|
|
|
- data := clear.DoClearFn(cfn, []interface{}{v.Value, jf.Content})
|
|
|
- v.Value = data[0]
|
|
|
- //清理特殊符号
|
|
|
- lock.Lock()
|
|
|
- if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
|
|
|
- clear.MesField[key] != nil {
|
|
|
- text := qu.ObjToString(v.Value)
|
|
|
- text = clear.OtherClean(key, text)
|
|
|
- v.Value = text
|
|
|
- }
|
|
|
- lock.Unlock()
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+
|
|
|
PackageDetail(j, e) //处理分包信息
|
|
|
- if jf != nil{
|
|
|
- PackageDetail(jf, e) //处理分包信息-附件
|
|
|
- }
|
|
|
// bs, _ := json.Marshal(j.Result)
|
|
|
// log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
|
|
|
- //分析抽取结果并保存 todo
|
|
|
- AnalysisSaveResult(j, e)
|
|
|
- if jf != nil{
|
|
|
- AnalysisSaveResult(jf, e) //分析抽取结果并保存-附件
|
|
|
- }
|
|
|
}, func(err interface{}) {
|
|
|
log.Println("ExtractProcess err", err)
|
|
|
})
|
|
|
- <-e.TaskInfo.ProcessPool
|
|
|
}
|
|
|
|
|
|
//前置过滤
|
|
@@ -517,9 +524,9 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
|
|
|
AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
|
|
|
} else {
|
|
|
var key string
|
|
|
- if doc["dockey"]== nil{
|
|
|
+ if !j.IsFile {
|
|
|
key = qu.If(in.Field == "", "detail", in.Field).(string)
|
|
|
- }else {
|
|
|
+ } else {
|
|
|
key = qu.If(in.Field == "", "detailfile", in.Field).(string)
|
|
|
}
|
|
|
text := qu.ObjToString(doc[key])
|
|
@@ -967,54 +974,9 @@ type FieldValue struct {
|
|
|
}
|
|
|
|
|
|
//分析抽取结果并保存
|
|
|
-func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
|
|
|
+func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
qu.Try(func() {
|
|
|
- doc := j.Data
|
|
|
- result := j.Result
|
|
|
- _id := qu.BsonIdToSId((*doc)["_id"])
|
|
|
- iscore, _ := ju.Config["fieldscore"].(bool)
|
|
|
- if iscore { //打分
|
|
|
- result = ScoreFields(j)
|
|
|
- }
|
|
|
- //结果排序
|
|
|
- values := map[string][]*ju.SortObject{}
|
|
|
- for key, val := range result {
|
|
|
- fieldValue := map[string][]interface{}{}
|
|
|
- if iscore { //走打分
|
|
|
- for _, v := range val {
|
|
|
- if len(fmt.Sprint(v.Value)) < 1 {
|
|
|
- continue //去除空串
|
|
|
- }
|
|
|
- fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value}
|
|
|
- }
|
|
|
- } else { //不走打分,按出现频次
|
|
|
- for _, v := range val {
|
|
|
- if len(fmt.Sprint(v.Value)) < 1 {
|
|
|
- continue //去除空串
|
|
|
- }
|
|
|
- if fieldValue[fmt.Sprint(v.Value)] == nil {
|
|
|
- fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
|
|
|
- } else {
|
|
|
- fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- objects := []*ju.SortObject{}
|
|
|
- for k, v := range fieldValue {
|
|
|
- ValueStr := "" //第二排序
|
|
|
- if reflect.TypeOf(v[1]).String() == "string" {
|
|
|
- ValueStr = qu.ObjToString(v[1])
|
|
|
- }
|
|
|
- tmp := &ju.SortObject{
|
|
|
- Key: k,
|
|
|
- Value: qu.IntAll(v[0]),
|
|
|
- Object: v[1],
|
|
|
- ValueStr: ValueStr,
|
|
|
- }
|
|
|
- objects = append(objects, tmp)
|
|
|
- }
|
|
|
- values[key] = ju.ExtSort(objects)
|
|
|
- }
|
|
|
+ doc, result, _id, values := funcAnalysis(j)
|
|
|
//从排序结果中取值
|
|
|
tmp := map[string]interface{}{} //抽取值
|
|
|
for key, val := range values {
|
|
@@ -1031,9 +993,31 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
|
|
|
if len(j.Winnerorder) > 0 { //候选人信息
|
|
|
tmp["winnerorder"] = j.Winnerorder
|
|
|
}
|
|
|
+ //处理附件
|
|
|
+ var resultf map[string][]*ju.ExtField
|
|
|
+ var filevalues map[string][]*ju.SortObject
|
|
|
+ if jf != nil {
|
|
|
+ _, resultf, _, filevalues = funcAnalysis(jf)
|
|
|
+ ffield := map[string]interface{}{}
|
|
|
+ for key, val := range filevalues {
|
|
|
+ for _, v := range val { //取第一个非负数
|
|
|
+ if v.Key != "" && v.Value > -1 {
|
|
|
+ ffield[key] = v.Object
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if len(jf.PackageInfo) > 0 { //分包信息
|
|
|
+ ffield["package"] = jf.PackageInfo
|
|
|
+ }
|
|
|
+ if len(jf.Winnerorder) > 0 { //候选人信息
|
|
|
+ ffield["winnerorder"] = jf.Winnerorder
|
|
|
+ }
|
|
|
+ tmp["ffield"] = ffield
|
|
|
+ }
|
|
|
for k, v := range *doc {
|
|
|
//去重冗余字段
|
|
|
- if k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" {
|
|
|
+ if delFiled(k) {
|
|
|
continue
|
|
|
}
|
|
|
if tmp[k] == nil {
|
|
@@ -1086,6 +1070,7 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
|
|
|
if b, ok := ju.Config["saveresult"].(bool); ok && b {
|
|
|
id := tmp["_id"]
|
|
|
tmp["result"] = result
|
|
|
+ tmp["resultf"] = resultf
|
|
|
delete(tmp, "_id")
|
|
|
tmparr := []map[string]interface{}{
|
|
|
map[string]interface{}{
|
|
@@ -1102,6 +1087,7 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
|
|
|
tmp["epackage"] = string(bs)
|
|
|
}
|
|
|
tmp["result"] = result
|
|
|
+ tmp["resultf"] = resultf
|
|
|
b := db.Mgo.Update(e.TaskInfo.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
|
|
|
if !b {
|
|
|
log.Println(e.TaskInfo.TestColl, _id)
|
|
@@ -1112,6 +1098,62 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
|
|
|
})
|
|
|
}
|
|
|
|
|
|
+//去重冗余字段
|
|
|
+func delFiled(k string) bool {
|
|
|
+ return k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" || k == "projectinfo"
|
|
|
+}
|
|
|
+
|
|
|
+func funcAnalysis(j *ju.Job) (*map[string]interface{}, map[string][]*ju.ExtField, string, map[string][]*ju.SortObject) {
|
|
|
+ defer qu.Catch()
|
|
|
+ doc := j.Data
|
|
|
+ result := j.Result
|
|
|
+ _id := qu.BsonIdToSId((*doc)["_id"])
|
|
|
+ iscore, _ := ju.Config["fieldscore"].(bool)
|
|
|
+ if iscore { //打分
|
|
|
+ result = ScoreFields(j)
|
|
|
+ }
|
|
|
+ //结果排序
|
|
|
+ values := map[string][]*ju.SortObject{}
|
|
|
+ for key, val := range result {
|
|
|
+ fieldValue := map[string][]interface{}{}
|
|
|
+ if iscore { //走打分
|
|
|
+ for _, v := range val {
|
|
|
+ if len(fmt.Sprint(v.Value)) < 1 {
|
|
|
+ continue //去除空串
|
|
|
+ }
|
|
|
+ fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value}
|
|
|
+ }
|
|
|
+ } else { //不走打分,按出现频次
|
|
|
+ for _, v := range val {
|
|
|
+ if len(fmt.Sprint(v.Value)) < 1 {
|
|
|
+ continue //去除空串
|
|
|
+ }
|
|
|
+ if fieldValue[fmt.Sprint(v.Value)] == nil {
|
|
|
+ fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
|
|
|
+ } else {
|
|
|
+ fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ objects := []*ju.SortObject{}
|
|
|
+ for k, v := range fieldValue {
|
|
|
+ ValueStr := "" //第二排序
|
|
|
+ if reflect.TypeOf(v[1]).String() == "string" {
|
|
|
+ ValueStr = qu.ObjToString(v[1])
|
|
|
+ }
|
|
|
+ tmp := &ju.SortObject{
|
|
|
+ Key: k,
|
|
|
+ Value: qu.IntAll(v[0]),
|
|
|
+ Object: v[1],
|
|
|
+ ValueStr: ValueStr,
|
|
|
+ }
|
|
|
+ objects = append(objects, tmp)
|
|
|
+ }
|
|
|
+ values[key] = ju.ExtSort(objects)
|
|
|
+ }
|
|
|
+ return doc, result, _id, values
|
|
|
+}
|
|
|
+
|
|
|
func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
|
|
|
defer qu.Catch()
|
|
|
//获取审核字段
|
|
@@ -1150,7 +1192,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
|
|
|
func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
|
|
|
defer qu.Catch()
|
|
|
i := redis.GetInt(field, field+"_"+fv) //查找redis
|
|
|
- if i == 0 { //reids未找到,执行规则匹配
|
|
|
+ if i == 0 { //reids未找到,执行规则匹配
|
|
|
val[field+"_isredis"] = false
|
|
|
e.RuleMatch(field, fv, val) //规则匹配
|
|
|
} else { //redis找到,打标识存库
|