Browse Source

块标签空指针处理

fengweiqiang 6 năm trước cách đây
mục cha
commit
bd3aa85e5c
1 tập tin đã thay đổi với 35 bổ sung29 xóa
  1. 35 29
      src/jy/extract/extract.go

+ 35 - 29
src/jy/extract/extract.go

@@ -22,13 +22,13 @@ import (
 )
 
 var (
-	lock    sync.RWMutex
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask          //任务列表
-	ClearTaskList map[string]*ClearTask            //清理任务列表
-	saveLimit     = 200                            //抽取日志批量保存
-	PageSize      = 5000                           //查询分页
+	lock          sync.RWMutex
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 200                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
 	Fields        = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -932,9 +932,9 @@ func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[stri
 			if j.Result[v.Field] == nil {
 				j.Result[v.Field] = [](*ju.ExtField){}
 			}
-			if tmp["blocktag"] != nil{
+			if tmp["blocktag"] != nil {
 				j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{tmp["blocktag"].(map[string]bool), v.Field, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
-			}else {
+			} else {
 				j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{nil, v.Field, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
 			}
 		}
@@ -956,7 +956,12 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 			if tmps, ok := v.([]map[string]interface{}); ok {
 				j.Result[k] = [](*ju.ExtField){}
 				for _, tmp := range tmps {
-					j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})
+					if tmp["blocktag"] != nil {
+						j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})
+					} else {
+						j.Result[k] = append(j.Result[k], &ju.ExtField{nil, k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})
+					}
+					//j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, qu.ObjToString(tmp["code"]), qu.ObjToString(tmp["ruletext"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"], 0})
 				}
 			}
 		}
@@ -1188,15 +1193,15 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if ju.Config["saveblock"].(bool) {
 			blocks := make([]ju.BlockAndTag, 0)
 			for _, v := range j.Block {
-				xx,_:=json.Marshal(v)
+				xx, _ := json.Marshal(v)
 				tmpblock := new(ju.TmpBlock)
-				err:= json.Unmarshal(xx,&tmpblock)
-				if err != nil{
-					if v.BPackage!= nil{
+				err := json.Unmarshal(xx, &tmpblock)
+				if err != nil {
+					if v.BPackage != nil {
 						bpb, _ := json.Marshal(v.BPackage)
 						tmpblock.BPackage = string(bpb)
 					}
-					tmpblock = rangeBlockToJson(v,*tmpblock)
+					tmpblock = rangeBlockToJson(v, *tmpblock)
 				}
 				blocks = append(blocks, ju.BlockAndTag{v.Tag, tmpblock})
 			}
@@ -1248,32 +1253,33 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		log.Debug("AnalysisSaveResult err", err)
 	})
 }
-func rangeBlockToJson(j *ju.Block,tmpblock ju.TmpBlock)(b *ju.TmpBlock){
-	if j == nil{
+func rangeBlockToJson(j *ju.Block, tmpblock ju.TmpBlock) (b *ju.TmpBlock) {
+	if j == nil {
 		return nil
 	}
-	if len(j.Block)>0{
-		for i,v := range j.Block{
+	if len(j.Block) > 0 {
+		for i, v := range j.Block {
 			rangetmp := new(ju.TmpBlock)
-			vb,_:=json.Marshal(v)
-			json.Unmarshal(vb,&rangetmp)
-			tmpblock.Block[i]=rangeBlockToJson(v,*rangetmp)
+			vb, _ := json.Marshal(v)
+			json.Unmarshal(vb, &rangetmp)
+			tmpblock.Block[i] = rangeBlockToJson(v, *rangetmp)
 		}
 	}
-	if j.ColonKV!= nil {
-		cb,_ := json.Marshal(j.ColonKV)
+	if j.ColonKV != nil {
+		cb, _ := json.Marshal(j.ColonKV)
 		tmpblock.ColonKV = string(cb)
 	}
-	if j.SpaceKV != nil{
-		sb,_ := json.Marshal(j.SpaceKV)
+	if j.SpaceKV != nil {
+		sb, _ := json.Marshal(j.SpaceKV)
 		tmpblock.SpaceKV = string(sb)
 	}
-	if j.TableKV != nil{
-		tb,_ := json.Marshal(j.TableKV)
+	if j.TableKV != nil {
+		tb, _ := json.Marshal(j.TableKV)
 		tmpblock.TableKV = string(tb)
 	}
 	return &tmpblock
 }
+
 //去重冗余字段
 func delFiled(k string) bool {
 	return k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" || k == "projectinfo" || k == "jsondata"
@@ -1368,7 +1374,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 { //reids未找到,执行规则匹配
+	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库