ソースを参照

Merge branch 'dev3.2' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.2

fengweiqiang 6 年 前
コミット
be44ee0886
2 ファイル変更55 行追加7 行削除
  1. 51 3
      src/jy/extract/extract.go
  2. 4 4
      src/res/isextract.json

+ 51 - 3
src/jy/extract/extract.go

@@ -185,6 +185,11 @@ func RunExtractTask(taskId string) {
 			if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
 				continue
 			}
+			//根据标题判断是否抽取
+			b := IsExtract("title", qu.ObjToString(v["title"]), "")
+			if !b {
+				continue
+			}
 			_id := qu.BsonIdToSId(v["_id"])
 			//log.Debug(_id)
 			if !ext.IsRun {
@@ -446,17 +451,22 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 				cfn := e.ClearFn[key]
 				lockclear.Unlock()
 				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
+				before, _ := v.Value.(string)
 				v.Value = data[0]
+				BeforeAddClearFnLog("clearcfn", "函数清理", j.SourceMid, before, "clear_cfn", v, e)
+				//添加行数清理的日志
 				//清理特殊符号
 				lockclear.Lock()
-				if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
-					clear.MesField[key] != nil {
+				if clear.AsyField[key] != nil || clear.SymField[key] != nil || clear.MesField[key] != nil {
 					text := qu.ObjToString(v.Value)
+					before = text
 					text = clear.OtherClean(key, text)
 					if text != "" {
 						v.Value = text
 					}
+					BeforeAddClearFnLog("clearsymbol", "特殊符号清理", j.SourceMid, before, "clear_symbol", v, e)
 				}
+				//AddClearFnLog("clearfn", j.SourceMid, v.Value, extinfo, v.Code, "函数清理", key, e.TaskInfo)
 				lockclear.Unlock()
 			}
 			//项目编号,采购单位权重清理
@@ -629,7 +639,7 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 //抽取-规则
 func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
 	defer qu.Catch()
-	//废标、流标、ppp等跳过
+	//根据field配置项目,是否抽取。例如:废标、流标等跳过,
 	b := IsExtract(in.Field, j.Title, j.Content)
 	if !b {
 		return
@@ -990,6 +1000,44 @@ func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *Re
 	ExtLogs[t] = append(ExtLogs[t], logdata)
 	lock.Unlock()
 }
+func BeforeAddClearFnLog(ftype, name, sid, before, matchtype string, ext *ju.ExtField, e *ExtractTask) {
+	exts := []map[string]interface{}{}
+	exts = append(exts, map[string]interface{}{
+		"field":     ext.Field,
+		"code":      ext.Code,
+		"type":      ftype,
+		"matchtype": matchtype,
+		"extfrom":   ext.ExtFrom,
+		"value":     ext.Value,
+	})
+	extinfo := map[string]interface{}{
+		ext.Field: exts,
+	}
+	AddClearFnLog(ftype, sid, before, extinfo, ext.Code, name, ext.Field, e.TaskInfo)
+}
+func AddClearFnLog(ftype, sid string, before interface{}, extinfo interface{}, code, name, field string, t *TaskInfo) {
+	defer qu.Catch()
+	if !t.IsEtxLog {
+		return
+	}
+	logdata := map[string]interface{}{
+		"code":       code,
+		"name":       name,
+		"type":       ftype,
+		"ruletext":   "",
+		"islua":      false,
+		"field":      field,
+		"version":    t.Version,
+		"taskname":   t.Name,
+		"before":     before,
+		"extinfo":    extinfo,
+		"sid":        sid,
+		"comeintime": time.Now().Unix(),
+	}
+	lock.Lock()
+	ExtLogs[t] = append(ExtLogs[t], logdata)
+	lock.Unlock()
+}
 
 //保存抽取日志
 func SaveExtLog() {

+ 4 - 4
src/res/isextract.json

@@ -1,11 +1,11 @@
 {
     "bidamount": [
-        "(不足三家|废标流标|废标公告|流标公告|ppp项目|PPP项目)"
+        "(不足三家|废标流标|废标公告|流标公告)"
     ],
     "winner": [
-        "(不足三家|废标流标|废标公告|流标公告|ppp项目|PPP项目)"
+        "(不足三家|废标流标|废标公告|流标公告)"
     ],
-	"filter":[
-		"(?i)(PPP项目[::]否|非PPP项目)"
+	"title":[
+        "(?i)(ppp项目)"
 	]
 }