|
@@ -185,6 +185,11 @@ func RunExtractTask(taskId string) {
|
|
|
if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
|
|
|
continue
|
|
|
}
|
|
|
+ //根据标题判断是否抽取
|
|
|
+ b := IsExtract("title", qu.ObjToString(v["title"]), "")
|
|
|
+ if !b {
|
|
|
+ continue
|
|
|
+ }
|
|
|
_id := qu.BsonIdToSId(v["_id"])
|
|
|
//log.Debug(_id)
|
|
|
if !ext.IsRun {
|
|
@@ -446,17 +451,22 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
|
|
|
cfn := e.ClearFn[key]
|
|
|
lockclear.Unlock()
|
|
|
data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
|
|
|
+ before, _ := v.Value.(string)
|
|
|
v.Value = data[0]
|
|
|
+ BeforeAddClearFnLog("clearcfn", "函数清理", j.SourceMid, before, "clear_cfn", v, e)
|
|
|
+ //添加行数清理的日志
|
|
|
//清理特殊符号
|
|
|
lockclear.Lock()
|
|
|
- if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
|
|
|
- clear.MesField[key] != nil {
|
|
|
+ if clear.AsyField[key] != nil || clear.SymField[key] != nil || clear.MesField[key] != nil {
|
|
|
text := qu.ObjToString(v.Value)
|
|
|
+ before = text
|
|
|
text = clear.OtherClean(key, text)
|
|
|
if text != "" {
|
|
|
v.Value = text
|
|
|
}
|
|
|
+ BeforeAddClearFnLog("clearsymbol", "特殊符号清理", j.SourceMid, before, "clear_symbol", v, e)
|
|
|
}
|
|
|
+ //AddClearFnLog("clearfn", j.SourceMid, v.Value, extinfo, v.Code, "函数清理", key, e.TaskInfo)
|
|
|
lockclear.Unlock()
|
|
|
}
|
|
|
//项目编号,采购单位权重清理
|
|
@@ -629,7 +639,7 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
|
|
|
//抽取-规则
|
|
|
func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
|
|
|
defer qu.Catch()
|
|
|
- //废标、流标、ppp等跳过
|
|
|
+ //根据field配置项目,是否抽取。例如:废标、流标等跳过,
|
|
|
b := IsExtract(in.Field, j.Title, j.Content)
|
|
|
if !b {
|
|
|
return
|
|
@@ -990,6 +1000,44 @@ func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *Re
|
|
|
ExtLogs[t] = append(ExtLogs[t], logdata)
|
|
|
lock.Unlock()
|
|
|
}
|
|
|
+func BeforeAddClearFnLog(ftype, name, sid, before, matchtype string, ext *ju.ExtField, e *ExtractTask) {
|
|
|
+ exts := []map[string]interface{}{}
|
|
|
+ exts = append(exts, map[string]interface{}{
|
|
|
+ "field": ext.Field,
|
|
|
+ "code": ext.Code,
|
|
|
+ "type": ftype,
|
|
|
+ "matchtype": matchtype,
|
|
|
+ "extfrom": ext.ExtFrom,
|
|
|
+ "value": ext.Value,
|
|
|
+ })
|
|
|
+ extinfo := map[string]interface{}{
|
|
|
+ ext.Field: exts,
|
|
|
+ }
|
|
|
+ AddClearFnLog(ftype, sid, before, extinfo, ext.Code, name, ext.Field, e.TaskInfo)
|
|
|
+}
|
|
|
+func AddClearFnLog(ftype, sid string, before interface{}, extinfo interface{}, code, name, field string, t *TaskInfo) {
|
|
|
+ defer qu.Catch()
|
|
|
+ if !t.IsEtxLog {
|
|
|
+ return
|
|
|
+ }
|
|
|
+ logdata := map[string]interface{}{
|
|
|
+ "code": code,
|
|
|
+ "name": name,
|
|
|
+ "type": ftype,
|
|
|
+ "ruletext": "",
|
|
|
+ "islua": false,
|
|
|
+ "field": field,
|
|
|
+ "version": t.Version,
|
|
|
+ "taskname": t.Name,
|
|
|
+ "before": before,
|
|
|
+ "extinfo": extinfo,
|
|
|
+ "sid": sid,
|
|
|
+ "comeintime": time.Now().Unix(),
|
|
|
+ }
|
|
|
+ lock.Lock()
|
|
|
+ ExtLogs[t] = append(ExtLogs[t], logdata)
|
|
|
+ lock.Unlock()
|
|
|
+}
|
|
|
|
|
|
//保存抽取日志
|
|
|
func SaveExtLog() {
|