zhangjinkun hace 6 años
padre
commit
2c0fdfb0d8
Se han modificado 3 ficheros con 88 adiciones y 63 borrados
  1. 1 1
      src/config.json
  2. 18 16
      src/jy/extract/extract.go
  3. 69 46
      src/jy/extract/extractInit.go

+ 1 - 1
src/config.json

@@ -2,5 +2,5 @@
     "port": "9090",
     "mgodb": "192.168.3.207:27082",
     "dbsize": 5,
-    "dbname": "extract_v3"
+    "dbname": "extract_kf"
 } 

+ 18 - 16
src/jy/extract/extract.go

@@ -1,7 +1,6 @@
 package extract
 
 import (
-	"encoding/json"
 	"fmt"
 	"jy/clear"
 	db "jy/mongodbutil"
@@ -68,21 +67,27 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 
 //启动抽取
 func StartExtractTaskId(taskId string) bool {
+	isgo := false
 	ext := TaskList[taskId]
-	ext = &ExtractTask{}
-	ext.Id = taskId
-	ext.InitTaskInfo()
+	if ext == nil {
+		ext = &ExtractTask{}
+		ext.Id = taskId
+		ext.InitTaskInfo()
+		isgo = true
+	} else {
+		ext.Id = taskId
+		ext.InitTaskInfo()
+	}
 	ext.TaskInfo.DB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
 	ext.InitRulePres()
 	ext.InitRuleBacks()
 	ext.InitRuleCore()
 	ext.InitTag()
 	ext.InitClearFn()
-	if ext == nil {
-		//只启动一次taskId
-		go RunExtractTask(ext)
-	}
 	ext.IsRun = true
+	if isgo {
+		go RunExtractTask(taskId)
+	}
 	TaskList[taskId] = ext
 	return true
 }
@@ -100,10 +105,8 @@ func StopExtractTaskId(taskId string) bool {
 }
 
 //开始抽取
-func RunExtractTask(ext *ExtractTask) {
-	if !ext.IsRun {
-		return
-	}
+func RunExtractTask(taskId string) {
+	ext := TaskList[taskId]
 	query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}}
 	list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, -1, -1)
 	for k, v := range *list {
@@ -115,11 +118,10 @@ func RunExtractTask(ext *ExtractTask) {
 		ext.TaskInfo.ProcessPool <- true
 		go ext.ExtractProcess(j)
 		ext.TaskInfo.LastExtId = qu.BsonIdToSId(v["_id"])
-		time.Sleep(1 * time.Second)
 	}
 	//更新task.s_extlastid
 	db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
-	time.AfterFunc(1*time.Minute, func() { RunExtractTask(ext) })
+	time.AfterFunc(1*time.Minute, func() { RunExtractTask(taskId) })
 }
 
 //信息预处理
@@ -217,8 +219,8 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 				v.Value = data[0]
 			}
 		}
-		bs, _ := json.Marshal(j.Result)
-		log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
+		//bs, _ := json.Marshal(j.Result)
+		//log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
 		//分析抽取结果并保存 todo
 		AnalysisSaveResult(j.Data, j.Result, e.TaskInfo)
 

+ 69 - 46
src/jy/extract/extractInit.go

@@ -3,6 +3,7 @@ package extract
 
 import (
 	db "jy/mongodbutil"
+	"log"
 	qu "qfw/util"
 	"regexp"
 	"strings"
@@ -85,6 +86,7 @@ func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) {
 //加载任务信息
 func (e *ExtractTask) InitTaskInfo() {
 	task, _ := db.Mgo.FindById("task", e.Id, nil)
+	log.Println("task", task)
 	if len(*task) > 1 {
 		v, _ := db.Mgo.FindOne("version", `{"version":"`+(*task)["s_version"].(string)+`"}`)
 		e.TaskInfo = &TaskInfo{
@@ -100,6 +102,7 @@ func (e *ExtractTask) InitTaskInfo() {
 			LastExtId:   qu.ObjToString((*task)["s_extlastid"]),
 			ProcessPool: make(chan bool, qu.IntAllDef((*task)["i_process"], 1)),
 		}
+		log.Println(e.TaskInfo.Name, e.TaskInfo.ProcessPool)
 	} else {
 		return
 	}
@@ -118,13 +121,17 @@ func (e *ExtractTask) InitRulePres() {
 		if rinfo.IsLua {
 			rinfo.RuleText = v["s_luascript"].(string)
 		} else {
-			rinfo.RuleText = v["s_rule"].(string)
-			tmp := strings.Split(rinfo.RuleText, "__")
-			if len(tmp) == 2 {
-				rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
-			} else {
-				rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
-			}
+			qu.Try(func() {
+				rinfo.RuleText = v["s_rule"].(string)
+				tmp := strings.Split(rinfo.RuleText, "__")
+				if len(tmp) == 2 {
+					rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
+				} else {
+					rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
+				}
+			}, func(err interface{}) {
+				log.Println(rinfo.Code, rinfo.Field, err)
+			})
 		}
 		e.RulePres = append(e.RulePres, rinfo)
 	}
@@ -143,13 +150,17 @@ func (e *ExtractTask) InitRuleBacks() {
 		if rinfo.IsLua {
 			rinfo.RuleText = v["s_luascript"].(string)
 		} else {
-			rinfo.RuleText = v["s_rule"].(string)
-			tmp := strings.Split(rinfo.RuleText, "__")
-			if len(tmp) == 2 {
-				rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
-			} else {
-				rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
-			}
+			qu.Try(func() {
+				rinfo.RuleText = v["s_rule"].(string)
+				tmp := strings.Split(rinfo.RuleText, "__")
+				if len(tmp) == 2 {
+					rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
+				} else {
+					rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
+				}
+			}, func(err interface{}) {
+				log.Println(rinfo.Code, rinfo.Field, err)
+			})
 		}
 		e.RuleBacks = append(e.RuleBacks, rinfo)
 	}
@@ -184,14 +195,18 @@ func (e *ExtractTask) InitRuleCore() {
 				if rinfo.IsLua {
 					rinfo.RuleText = v["s_luascript"].(string)
 				} else {
-					rinfo.RuleText = v["s_rule"].(string)
-					rinfo.Field = v["s_field"].(string)
-					tmp := strings.Split(rinfo.RuleText, "__")
-					if len(tmp) == 2 {
-						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
-					} else {
-						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
-					}
+					qu.Try(func() {
+						rinfo.RuleText = v["s_rule"].(string)
+						rinfo.Field = v["s_field"].(string)
+						tmp := strings.Split(rinfo.RuleText, "__")
+						if len(tmp) == 2 {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
+						} else {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
+						}
+					}, func(err interface{}) {
+						log.Println(rinfo.Code, rinfo.Field, err)
+					})
 				}
 				rulePres = append(rulePres, rinfo)
 			}
@@ -209,14 +224,18 @@ func (e *ExtractTask) InitRuleCore() {
 				if rinfo.IsLua {
 					rinfo.RuleText = v["s_luascript"].(string)
 				} else {
-					rinfo.RuleText = v["s_rule"].(string)
-					rinfo.Field = v["s_field"].(string)
-					tmp := strings.Split(rinfo.RuleText, "__")
-					if len(tmp) == 2 {
-						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
-					} else {
-						rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
-					}
+					qu.Try(func() {
+						rinfo.RuleText = v["s_rule"].(string)
+						rinfo.Field = v["s_field"].(string)
+						tmp := strings.Split(rinfo.RuleText, "__")
+						if len(tmp) == 2 {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: tmp[1]}
+						} else {
+							rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Replace: ""}
+						}
+					}, func(err interface{}) {
+						log.Println(rinfo.Code, rinfo.Field, err)
+					})
 				}
 				ruleBacks = append(ruleBacks, rinfo)
 			}
@@ -244,24 +263,28 @@ func (e *ExtractTask) InitRuleCore() {
 						rinfo.IsHasFields = true
 					}*/
 				} else {
-					rinfo.RuleText = v["s_rule"].(string)
-					rinfo.Field = v["s_field"].(string)
-					tmp := strings.Split(rinfo.RuleText, "__")
-					if len(tmp) == 2 {
-						epos := strings.Split(tmp[1], ",")
-						posm := map[string]int{}
-						for _, v := range epos {
-							ks := strings.Split(v, ":")
-							if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
-								posm[ks[1]] = qu.IntAll(ks[0])
-							} else { //(.*)招标公告__2
-								posm[rinfo.Field] = qu.IntAll(ks[0])
+					qu.Try(func() {
+						rinfo.RuleText = v["s_rule"].(string)
+						rinfo.Field = v["s_field"].(string)
+						tmp := strings.Split(rinfo.RuleText, "__")
+						if len(tmp) == 2 {
+							epos := strings.Split(tmp[1], ",")
+							posm := map[string]int{}
+							for _, v := range epos {
+								ks := strings.Split(v, ":")
+								if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+									posm[ks[1]] = qu.IntAll(ks[0])
+								} else { //(.*)招标公告__2
+									posm[rinfo.Field] = qu.IntAll(ks[0])
+								}
 							}
+							rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: true, ExtractPos: posm}
+						} else {
+							rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: false}
 						}
-						rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: true, ExtractPos: posm}
-					} else {
-						rinfo.RegCore = &ExtReg{Reg: regexp.MustCompile(tmp[0]), Bextract: false}
-					}
+					}, func(err interface{}) {
+						log.Println(rinfo.Code, rinfo.Field, err)
+					})
 				}
 				ruleCores = append(ruleCores, rinfo)
 			}