zhangjinkun 6 rokov pred
rodič
commit
342a628e3e
2 zmenil súbory, kde vykonal 10 pridanie a 5 odobranie
  1. 6 1
      src/jy/extract/extract.go
  2. 4 4
      src/res/isextract.json

+ 6 - 1
src/jy/extract/extract.go

@@ -185,6 +185,11 @@ func RunExtractTask(taskId string) {
 			if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
 				continue
 			}
+			//根据标题判断是否抽取
+			b := IsExtract("title", qu.ObjToString(v["title"]), "")
+			if !b {
+				continue
+			}
 			_id := qu.BsonIdToSId(v["_id"])
 			//log.Debug(_id)
 			if !ext.IsRun {
@@ -610,7 +615,7 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 //抽取-规则
 func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
 	defer qu.Catch()
-	//废标、流标、ppp等跳过
+	//根据field配置项目,是否抽取。例如:废标、流标等跳过,
 	b := IsExtract(in.Field, j.Title, j.Content)
 	if !b {
 		return

+ 4 - 4
src/res/isextract.json

@@ -1,11 +1,11 @@
 {
     "bidamount": [
-        "(不足三家|废标流标|废标公告|流标公告|ppp项目|PPP项目)"
+        "(不足三家|废标流标|废标公告|流标公告)"
     ],
     "winner": [
-        "(不足三家|废标流标|废标公告|流标公告|ppp项目|PPP项目)"
+        "(不足三家|废标流标|废标公告|流标公告)"
     ],
-	"filter":[
-		"(?i)(PPP项目[::]否|非PPP项目)"
+	"title":[
+        "(?i)(ppp项目)"
 	]
 }