浏览代码

抽取逻辑

zhangjinkun 6 年之前
父节点
当前提交
f2b6aa894a

+ 58 - 25
src/jy/extract/extract.go

@@ -1,6 +1,7 @@
 package extract
 
 import (
+	"encoding/json"
 	db "jy/mongodbutil"
 	ju "jy/util"
 	"log"
@@ -24,9 +25,10 @@ type ExtReg struct {
 	Reg        *regexp.Regexp
 	Replace    string
 	Bextract   bool
-	ExtractPos int
+	ExtractPos map[string]int
 }
 type RuleCore struct {
+	LuaLogic  string        //进入逻辑
 	RulePres  []*RegLuaInfo //前置规则
 	RuleBacks []*RegLuaInfo //后置规则
 	RuleCores []*RegLuaInfo //抽取规则
@@ -40,13 +42,13 @@ type TaskInfo struct {
 	ProcessPool                  chan bool //任务进程池
 }
 type ExtField struct {
-	Field  string         //属性
-	Value  map[string]int //属性值:出现次数
-	ExtNum int            //抽取次数
+	Field string        //属性
+	Value []interface{} //抽取结果
 }
 type ExtractTask struct {
 	Id        string        //任务id
 	IsRun     bool          //是否启动
+	Content   string        //信息内容
 	TaskInfo  *TaskInfo     //任务信息
 	RulePres  []*RegLuaInfo //前置规则
 	RuleBacks []*RegLuaInfo //后置规则
@@ -134,7 +136,7 @@ func (e *ExtractTask) InitTaskInfo() {
 //加载前置规则
 func (e *ExtractTask) InitRulePres() {
 	defer qu.Catch()
-	list, _ := db.Mgo.Find("rule_pre", `{"s_version":"`+e.TaskInfo.Version+`"}`, `{"_id":-1}`, nil, false, -1, -1)
+	list, _ := db.Mgo.Find("rule_pre", `{"s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
 	for _, v := range *list {
 		rinfo := &RegLuaInfo{
 			Code:  v["s_code"].(string),
@@ -159,7 +161,7 @@ func (e *ExtractTask) InitRulePres() {
 //加载后置规则
 func (e *ExtractTask) InitRuleBacks() {
 	defer qu.Catch()
-	list, _ := db.Mgo.Find("rule_back", `{"s_version":"`+e.TaskInfo.Version+`"}`, `{"_id":-1}`, nil, false, -1, -1)
+	list, _ := db.Mgo.Find("rule_back", `{"s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
 	for _, v := range *list {
 		rinfo := &RegLuaInfo{
 			Code:  v["s_code"].(string),
@@ -184,15 +186,17 @@ func (e *ExtractTask) InitRuleBacks() {
 //加载抽取规则
 func (e *ExtractTask) InitRuleCore() {
 	defer qu.Catch()
-	list, _ := db.Mgo.Find("rule_logic", `{"s_version":"`+e.TaskInfo.Version+`"}`, `{"_id":-1}`, nil, false, -1, -1)
+	list, _ := db.Mgo.Find("rule_logic", `{"s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
 	for _, vv := range *list {
 		if b, _ := vv["isuse"].(bool); !b {
 			continue
 		}
 		rcore := &RuleCore{}
+		//是否进入逻辑脚本
+		rcore.LuaLogic = qu.ObjToString(vv["s_luascript"])
 		//前置规则
 		rulePres := []*RegLuaInfo{}
-		plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","s_version":"`+e.TaskInfo.Version+`"}`, `{"_id":-1}`, nil, false, -1, -1)
+		plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
 		for _, v := range *plist {
 			rinfo := &RegLuaInfo{
 				Code:  v["s_code"].(string),
@@ -217,7 +221,7 @@ func (e *ExtractTask) InitRuleCore() {
 
 		//后置规则
 		ruleBacks := []*RegLuaInfo{}
-		blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","s_version":"`+e.TaskInfo.Version+`"}`, `{"_id":-1}`, nil, false, -1, -1)
+		blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
 		for _, v := range *blist {
 			rinfo := &RegLuaInfo{
 				Code:  v["s_code"].(string),
@@ -242,7 +246,7 @@ func (e *ExtractTask) InitRuleCore() {
 
 		//抽取规则
 		ruleCores := []*RegLuaInfo{}
-		clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","s_version":"`+e.TaskInfo.Version+`"}`, `{"_id":-1}`, nil, false, -1, -1)
+		clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
 		for _, v := range *clist {
 			if b, _ := v["isuse"].(bool); !b {
 				continue
@@ -259,9 +263,19 @@ func (e *ExtractTask) InitRuleCore() {
 				rinfo.Field = v["s_field"].(string)
 				tmp := strings.Split(rinfo.RuleText, "__")
 				if len(tmp) == 2 {
-					rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: true, ExtractPos: qu.IntAll(tmp[1])}
+					epos := strings.Split(tmp[1], ",")
+					posm := map[string]int{}
+					for _, v := range epos {
+						ks := strings.Split(v, ":")
+						if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+							posm[ks[1]] = qu.IntAll(ks[0])
+						} else { //(.*)招标公告__2
+							posm[rinfo.Field] = qu.IntAll(ks[0])
+						}
+					}
+					rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: true, ExtractPos: posm}
 				} else {
-					rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: false, ExtractPos: 0}
+					rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: false}
 				}
 			}
 			ruleCores = append(ruleCores, rinfo)
@@ -282,9 +296,13 @@ func (e *ExtractTask) ExtractProcess(doc map[string]interface{}) {
 		for _, v := range e.RulePres {
 			doc = ExtRegPre(doc, v, e.TaskInfo)
 		}
-		log.Println("前置规则,detail", doc["detail"])
+		log.Println("全局前置规则", doc)
 		//抽取规则
 		for _, vc := range e.RuleCores {
+			//是否进入逻辑
+			if !ju.Logic(vc.LuaLogic, doc) {
+				continue
+			}
 			data := map[string]interface{}{}
 			//抽取-前置规则
 			tmpdoc := map[string]interface{}{}
@@ -303,6 +321,11 @@ func (e *ExtractTask) ExtractProcess(doc map[string]interface{}) {
 				data = ExtRegBack(data, v, e.TaskInfo)
 			}
 			log.Println("抽取-后置规则", data)
+			//全局后置规则
+			for _, v := range e.RuleBacks {
+				data = ExtRegBack(data, v, e.TaskInfo)
+			}
+			log.Println("全局后置规则", data)
 
 			//抽取结果赋值
 			for k, v := range data {
@@ -310,15 +333,16 @@ func (e *ExtractTask) ExtractProcess(doc map[string]interface{}) {
 					continue
 				}
 				if result[k] == nil {
-					result[k] = &ExtField{Field: k, Value: map[string]int{qu.ObjToString(v): 1}, ExtNum: 1}
+					result[k] = &ExtField{Field: k, Value: []interface{}{v}}
 				} else {
-					ef := result[k]
-					ef.Value[qu.ObjToString(v)] += 1
-					ef.ExtNum += 1
+					result[k].Value = append(result[k].Value, v)
 				}
 			}
-			//抽取结果保存 todo
+			bs, _ := json.Marshal(result)
+			log.Println("抽取结果", string(bs))
 		}
+		//抽取结果保存 todo
+
 	}, func(err interface{}) {
 		log.Println(err)
 		<-e.TaskInfo.ProcessPool
@@ -364,8 +388,11 @@ func ExtRegCore(doc map[string]interface{}, v *RegLuaInfo, t *TaskInfo) map[stri
 			apos := v.RegCore.Reg.FindAllStringSubmatchIndex(text, -1)
 			if len(apos) > 0 {
 				pos := apos[0]
-				if len(pos)-1 > v.RegCore.ExtractPos {
-					doc[v.Field] = text[pos[v.RegCore.ExtractPos]:pos[v.RegCore.ExtractPos+1]]
+				for k, p := range v.RegCore.ExtractPos {
+					if len(pos) > p {
+						doc[k] = text[pos[p]:pos[p+1]]
+						//log.Println(k, doc[k])
+					}
 				}
 			}
 		} else {
@@ -388,10 +415,13 @@ func ExtRegBack(doc map[string]interface{}, v *RegLuaInfo, t *TaskInfo) map[stri
 	} else {
 		tmp := doc
 		if v.Field != "" && qu.ObjToString(doc[v.Field]) != "" {
-			doc[v.Field] = v.RegPreBac.Reg.ReplaceAllString(qu.ObjToString(doc[v.Field]), "")
+			doc[v.Field] = v.RegPreBac.Reg.ReplaceAllString(qu.ObjToString(doc[v.Field]), v.RegPreBac.Replace)
 		} else {
 			for k, val := range doc {
-				doc[k] = v.RegPreBac.Reg.ReplaceAllString(qu.ObjToString(val), "")
+				if k == "_id" || k == "detail" || qu.ObjToString(val) == "" {
+					continue
+				}
+				doc[k] = v.RegPreBac.Reg.ReplaceAllString(qu.ObjToString(val), v.RegPreBac.Replace)
 			}
 		}
 		AddExtLog(tmp, doc, v, t) //抽取日志
@@ -400,7 +430,7 @@ func ExtRegBack(doc map[string]interface{}, v *RegLuaInfo, t *TaskInfo) map[stri
 }
 
 //抽取日志
-func AddExtLog(before, extifno map[string]interface{}, v *RegLuaInfo, t *TaskInfo) {
+func AddExtLog(before, extinfo map[string]interface{}, v *RegLuaInfo, t *TaskInfo) {
 	if !t.IsEtxLog {
 		return
 	}
@@ -412,10 +442,12 @@ func AddExtLog(before, extifno map[string]interface{}, v *RegLuaInfo, t *TaskInf
 		"version":    t.Version,
 		"taskname":   t.Name,
 		"before":     before,
-		"extinfo":    extifno,
+		"extinfo":    extinfo,
+		"sid":        qu.BsonIdToSId(before["_id"]),
 		"comeintime": time.Now().Unix(),
 	}
 	lock.Lock()
+
 	ExtLogs[t] = append(ExtLogs[t], logdata)
 	lock.Unlock()
 }
@@ -438,9 +470,10 @@ func SaveExtLog() {
 					v = v[saveLimit:]
 				} else {
 					k.DB.SaveBulk(k.TrackColl, v...)
+					break
 				}
 			}
 		}
 	}
-	time.AfterFunc(2*time.Minute, SaveExtLog)
+	time.AfterFunc(1*time.Minute, SaveExtLog)
 }

+ 38 - 1
src/jy/util/script.go

@@ -2,8 +2,10 @@
 package util
 
 import (
+	"encoding/json"
 	"fmt"
 
+	ljson "github.com/yuin/gopher-json"
 	"github.com/yuin/gopher-lua"
 )
 
@@ -16,6 +18,7 @@ type LuaScript struct {
 func (s *LuaScript) RunScript() map[string]interface{} {
 	data := map[string]interface{}{}
 	s.L = lua.NewState()
+	s.L.PreloadModule("json", ljson.Loader)
 	defer s.L.Close()
 	if err := s.L.DoString(s.Script); err != nil {
 		data["err"] = err.Error()
@@ -38,6 +41,31 @@ func (s *LuaScript) RunScript() map[string]interface{} {
 	}
 	return data
 }
+func Logic(str string, doc map[string]interface{}) bool {
+	L := lua.NewState()
+	L.PreloadModule("json", ljson.Loader)
+
+	defer L.Close()
+	b := false
+	if err := L.DoString(str); err != nil {
+		panic(err)
+	} else {
+		tab := MapToLuaTable(L, doc)
+		if err := L.CallByParam(lua.P{
+			Fn:      L.GetGlobal("logic"),
+			NRet:    1,
+			Protect: true,
+		}, tab); err != nil {
+			panic(err)
+		}
+		ret := L.Get(-1)
+		L.Pop(1)
+		if ret.String() == "true" {
+			b = true
+		}
+	}
+	return b
+}
 
 func MapToLuaTable(l *lua.LState, obj map[string]interface{}) *lua.LTable {
 	tab := l.NewTable()
@@ -56,6 +84,9 @@ func MapToLuaTable(l *lua.LState, obj map[string]interface{}) *lua.LTable {
 			tab.RawSet(lua.LString(k), lua.LBool(val))
 		} else if val, ok := v.(map[string]interface{}); ok {
 			tab.RawSet(lua.LString(k), MapToLuaTable(l, val))
+		} else if val, ok := v.([]interface{}); ok {
+			bs, _ := json.Marshal(val)
+			tab.RawSet(lua.LString(k), lua.LString(string(bs)))
 		}
 	}
 	return tab
@@ -69,8 +100,14 @@ func LuaTableToMap(param *lua.LTable) map[string]interface{} {
 			tmp[k] = string(v)
 		} else if v, ok := val.(*lua.LTable); ok {
 			tmp[k] = LuaTableToMap(v)
+		} else if v, ok := val.(*lua.LBool); ok {
+			if v.String() == "true" {
+				tmp[k] = true
+			} else {
+				tmp[k] = false
+			}
 		} else {
-			tmp[k] = val
+			tmp[k] = v
 		}
 	})
 	return tmp

+ 1 - 1
src/web/templates/admin/rule_backlist.html

@@ -125,7 +125,7 @@ $(function () {
 					_tit="新增规则"
 					if(n=="newlua"){
 						_tit="新增脚本"
-						obj={"s_version":"{{.version}}","s_type":"1"}
+						obj={"s_luascript":"function main(doc)\nend","s_version":"{{.version}}","s_type":"1"}
 						tag = com.pushArry(tag,luatag)
 						tag = com.pushArry(tag,hiddentag)
 						islua=true

+ 1 - 1
src/web/templates/admin/rule_logicbacklist.html

@@ -125,7 +125,7 @@ $(function () {
 					_tit="新增规则"
 					if(n=="newlua"){
 						_tit="新增脚本"
-						obj={"sid":"{{.sid}}","s_type":"1","s_version":"{{.version}}"}
+						obj={"s_luascript":"function main(doc)\nend","sid":"{{.sid}}","s_type":"1","s_version":"{{.version}}"}
 						tag = com.pushArry(tag,luatag)
 						tag = com.pushArry(tag,hiddentag)
 						islua=true

+ 9 - 3
src/web/templates/admin/rule_logiclist.html

@@ -104,19 +104,20 @@ $(function () {
 				tag=[{label:"名称",s_label:"s_name",placeholder:"",must:true},
 					{label:"描述",s_label:"s_descript",type:"tpl_text"},
 					{label:"启用",s_label:"isuse",type:"tpl_list_local",list:[{"s_name":"是","_id":true},{"s_name":"否","_id":false}],default:true},
+					{label:"是否适用",s_label:"s_luascript",type:"tpl_text",must:true},
 					{s_label:"_id",type:"tpl_hidden"},
 					{s_label:"s_version",type:"tpl_hidden"}]
-				islua=false
 				if(n=="edit"){
 					_tit="编辑-"+obj.s_name
 				}else{
 					_tit="新增逻辑"
-					obj={"s_version":"{{.version}}","s_type":"0"}
+					obj={"s_luascript":"function logic(doc)\n\treturn true\nend","s_version":"{{.version}}","s_type":"0"}
 				}
 				
 				htmlObj={
 					title:_tit,
 					tag:tag,
+					lua:true,
 					bts:[
 						{label:"保存",class:"btn-primary",
 							fun:function(){
@@ -124,11 +125,16 @@ $(function () {
 								var bcon=true
 								$("#_con").find("input[id!=s_show],textarea").each(function(i,el){
 									var val=$(el).val();
+									if(el.id=="s_luascript"){
+										val=editor_1.getValue()
+										obj[el.id]=val
+									}else{
+										obj[el.id]=$(el).val()
+									}
 									if(el.id!="_id"&&$(el).attr("must")&&!val){
 										bcon=false
 										return false
 									}
-									obj[el.id]=$(el).val()
 								})
 								if (bcon){								
 									$.post("/admin/rulelogic/save",obj,function(data){

+ 1 - 1
src/web/templates/admin/rule_logicore.html

@@ -135,7 +135,7 @@ $(function () {
 					_tit="新增规则"
 					if(n=="newlua"){
 						_tit="新增脚本"
-						obj={"sid":"{{.sid}}","s_type":"1","s_version":"{{.version}}"}
+						obj={"s_luascript":"function main(doc)\nend","sid":"{{.sid}}","s_type":"1","s_version":"{{.version}}"}
 						tag = com.pushArry(tag,luatag)
 						tag = com.pushArry(tag,hiddentag)
 						islua=true

+ 1 - 1
src/web/templates/admin/rule_logicprelist.html

@@ -125,7 +125,7 @@ $(function () {
 					_tit="新增规则"
 					if(n=="newlua"){
 						_tit="新增脚本"
-						obj={"sid":"{{.sid}}","s_type":"1","s_version":"{{.version}}"}
+						obj={"s_luascript":"function main(doc)\nend","sid":"{{.sid}}","s_type":"1","s_version":"{{.version}}"}
 						tag = com.pushArry(tag,luatag)
 						tag = com.pushArry(tag,hiddentag)
 						islua=true

+ 1 - 1
src/web/templates/admin/rule_prelist.html

@@ -125,7 +125,7 @@ $(function () {
 					_tit="新增规则"
 					if(n=="newlua"){
 						_tit="新增脚本"
-						obj={"s_version":"{{.version}}","s_type":"1"}
+						obj={"s_luascript":"function main(doc)\nend","s_version":"{{.version}}","s_type":"1"}
 						tag = com.pushArry(tag,luatag)
 						tag = com.pushArry(tag,hiddentag)
 						islua=true