wcj 6 vuotta sitten
vanhempi
commit
c112c70013
3 muutettua tiedostoa jossa 15 lisäystä ja 11 poistoa
  1. 10 8
      src/jy/extract/extract.go
  2. 2 3
      src/jy/extract/extractInit.go
  3. 3 0
      src/jy/pretreated/analytable.go

+ 10 - 8
src/jy/extract/extract.go

@@ -24,12 +24,12 @@ import (
 var (
 	lock, lockrule, lockclear sync.RWMutex
 
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask          //任务列表
-	ClearTaskList map[string]*ClearTask            //清理任务列表
-	saveLimit     = 200                            //抽取日志批量保存
-	PageSize      = 5000                           //查询分页
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 200                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
 	Fields        = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -1153,7 +1153,9 @@ type FieldValue struct {
 func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 	qu.Try(func() {
 		doc, result, _id := funcAnalysis(j)
-		go otherNeedSave(j, result, e)
+		if isSaveTag, _ := ju.Config["isSaveTag"].(bool); isSaveTag {
+			go otherNeedSave(j, result, e)
+		}
 		auxinfo := auxInfo(j)
 		//从排序结果中取值
 		tmp := map[string]interface{}{} //抽取值
@@ -1499,7 +1501,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 { //reids未找到,执行规则匹配
+	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 2 - 3
src/jy/extract/extractInit.go

@@ -369,12 +369,11 @@ func (e *ExtractTask) InitRuleCore() {
 						tmp := strings.Split(rinfo.RuleText, "__")
 						var pattern string
 						if strings.Contains(tmp[0], "\\u") {
-							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
 							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
-							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
 						} else {
 							pattern = tmp[0]
 						}
+						pattern, _ = strconv.Unquote(`"` + pattern + `"`)
 						if len(tmp) == 2 {
 							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: tmp[1]}
 						} else {
@@ -414,12 +413,12 @@ func (e *ExtractTask) InitRuleCore() {
 						tmp := strings.Split(rinfo.RuleText, "__")
 						var pattern string
 						if strings.Contains(tmp[0], "\\u") {
-							tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
 							tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
 							pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
 						} else {
 							pattern = tmp[0]
 						}
+						pattern, _ = strconv.Unquote(`"` + pattern + `"`)
 						if len(tmp) == 2 {
 							epos := strings.Split(tmp[1], ",")
 							posm := map[string]int{}

+ 3 - 0
src/jy/pretreated/analytable.go

@@ -3,6 +3,7 @@ package pretreated
 import (
 	"fmt"
 	u "jy/util"
+	"log"
 	qutil "qfw/util"
 	"regexp"
 	"strings"
@@ -2717,6 +2718,8 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
 			}
 		}
 		td.SortKV.AddKey(myContactType+td_k, td_v)
+		log.Println(myContactType, td_k, td_v)
+		delete(td.SortKV.NotTagKey, td_k)
 	}
 }