Quellcode durchsuchen

Merge branch 'dev3.1' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.1

zhangjinkun vor 6 Jahren
Ursprung
Commit
aa7c3f22cb
3 geänderte Dateien mit 28 neuen und 1 gelöschten Zeilen
  1. 5 0
      src/jy/extract/exportask.go
  2. 6 1
      src/jy/extract/extract.go
  3. 17 0
      src/jy/extract/extractudp.go

+ 5 - 0
src/jy/extract/exportask.go

@@ -59,6 +59,11 @@ func extractAndExport(v string, t map[string]interface{}) {
 	limit := qu.IntAll(t["limit"])
 	list, _ := e.TaskInfo.FDB.Find(e.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)
 	for _, v := range *list {
+		if v["check_sensitive2"] != nil { //去除含敏感词数据
+			if v["check_sensitive2"].(string) != "" {
+				continue
+			}
+		}
 		j := PreInfo(v)
 		e.TaskInfo.ProcessPool <- true
 		go e.ExtractProcess(j)

+ 6 - 1
src/jy/extract/extract.go

@@ -26,7 +26,7 @@ var (
 	TaskList  map[string]*ExtractTask                //任务列表
 	saveLimit = 200                                  //抽取日志批量保存
 	PageSize  = 5000                                 //查询分页
-	Fields    = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1}`
+	Fields    = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"check_sensitive2":1}`
 )
 
 //启动测试抽取
@@ -148,6 +148,11 @@ func RunExtractTask(taskId string) {
 		log.Printf("page=%d,query=%v", i+1, query)
 		list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)
 		for _, v := range *list {
+			if v["check_sensitive2"] != nil { //去除含敏感词数据
+				if v["check_sensitive2"].(string) != "" {
+					continue
+				}
+			}
 			//log.Println(v["_id"])
 			if !ext.IsRun {
 				break

+ 17 - 0
src/jy/extract/extractudp.go

@@ -98,6 +98,7 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 		ext.InitDFA()
 	}
 	//质量审核
+	ext.InitAuditFields()
 	ext.InitAuditRule()
 	ext.InitAuditClass()
 	ext.InitAuditRecogField()
@@ -135,6 +136,11 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 			if ext.TaskInfo.FDB.Count(ext.TaskInfo.FromColl, query) > 0 {
 				list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)
 				for _, v := range *list {
+					if v["check_sensitive2"] != nil { //去除含敏感词数据
+						if v["check_sensitive2"].(string) != "" {
+							continue
+						}
+					}
 					//log.Println(v["_id"])
 					j := PreInfo(v)
 					ext.TaskInfo.ProcessPool <- true
@@ -151,6 +157,11 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 			if ext.TaskInfo.FDB.Count(ext.TaskInfo.FromColl+"_back", queryback) > 0 {
 				list2, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl+"_back", queryback, nil, Fields, false, 0, limit)
 				for _, v := range *list2 {
+					if v["check_sensitive2"] != nil { //去除含敏感词数据
+						if v["check_sensitive2"].(string) != "" {
+							continue
+						}
+					}
 					//log.Println(v["_id"])
 					j := PreInfo(v)
 					ext.TaskInfo.ProcessPool <- true
@@ -181,12 +192,18 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 			log.Printf("page=%d,query=%v", i+1, query)
 			list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)
 			for _, v := range *list {
+				if v["check_sensitive2"] != nil { //去除含敏感词数据
+					if v["check_sensitive2"].(string) != "" {
+						continue
+					}
+				}
 				//log.Println(v["_id"])
 				j := PreInfo(v)
 				ext.TaskInfo.ProcessPool <- true
 				go ext.ExtractProcess(j)
 				sid = qu.BsonIdToSId(v["_id"])
 			}
+
 		}
 	}
 }