zhangjinkun 6 년 전
부모
커밋
c2836fd193
3개의 변경된 파일68개의 추가작업 그리고 71개의 파일을 삭제
  1. 55 38
      src/jy/admin/rulecheck.go
  2. 10 11
      src/jy/extract/extract.go
  3. 3 22
      src/main_test.go

+ 55 - 38
src/jy/admin/rulecheck.go

@@ -2,6 +2,7 @@
 package admin
 
 import (
+	"fmt"
 	"jy/extract"
 	. "jy/mongodbutil"
 	ju "jy/util"
@@ -149,58 +150,74 @@ func getCheckInfos() *[]map[string]interface{} {
 
 //正则前置过滤检查
 func checkPreReg(content, ruleText string) string {
-	tmp := strings.Split(ruleText, "__")
-	if len(tmp) == 2 {
-		reg := regexp.MustCompile(tmp[0])
-		return reg.ReplaceAllString(content, tmp[1])
-	} else {
-		reg := regexp.MustCompile(tmp[0])
-		return reg.ReplaceAllString(content, "")
-	}
+	tmpstr := ""
+	qu.Try(func() {
+		tmp := strings.Split(ruleText, "__")
+		if len(tmp) == 2 {
+			reg := regexp.MustCompile(tmp[0])
+			tmpstr = reg.ReplaceAllString(content, tmp[1])
+		} else {
+			reg := regexp.MustCompile(tmp[0])
+			tmpstr = reg.ReplaceAllString(content, "")
+		}
+	}, func(err interface{}) {
+		tmpstr = fmt.Sprint(err)
+	})
+	return tmpstr
 }
 
 //正则后置过滤检查
 func checkBackReg(content, ruleText string) string {
-	tmp := strings.Split(ruleText, "__")
-	if len(tmp) == 2 {
-		reg := regexp.MustCompile(tmp[0])
-		return reg.ReplaceAllString(content, tmp[1])
-	} else {
-		reg := regexp.MustCompile(tmp[0])
-		return reg.ReplaceAllString(content, "")
-	}
+	tmpstr := ""
+	qu.Try(func() {
+		tmp := strings.Split(ruleText, "__")
+		if len(tmp) == 2 {
+			reg := regexp.MustCompile(tmp[0])
+			tmpstr = reg.ReplaceAllString(content, tmp[1])
+		} else {
+			reg := regexp.MustCompile(tmp[0])
+			tmpstr = reg.ReplaceAllString(content, "")
+		}
+	}, func(err interface{}) {
+		tmpstr = fmt.Sprint(err)
+	})
+	return tmpstr
 }
 
 //正则抽取检查
 func checkCoreReg(field, content, ruleText string) map[string]string {
 	rep := map[string]string{}
-	tmp := strings.Split(ruleText, "__")
-	if len(tmp) == 2 {
-		epos := strings.Split(tmp[1], ",")
-		posm := map[string]int{}
-		for _, v := range epos {
-			ks := strings.Split(v, ":")
-			if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
-				posm[ks[1]] = qu.IntAll(ks[0])
-			} else {
-				posm[field] = qu.IntAll(ks[0])
+	qu.Try(func() {
+		tmp := strings.Split(ruleText, "__")
+		if len(tmp) == 2 {
+			epos := strings.Split(tmp[1], ",")
+			posm := map[string]int{}
+			for _, v := range epos {
+				ks := strings.Split(v, ":")
+				if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+					posm[ks[1]] = qu.IntAll(ks[0])
+				} else {
+					posm[field] = qu.IntAll(ks[0])
+				}
 			}
-		}
-		reg := regexp.MustCompile(tmp[0])
-		apos := reg.FindAllStringSubmatchIndex(content, -1)
-		if len(apos) > 0 {
-			pos := apos[0]
-			for k, p := range posm {
-				if len(pos) > p {
-					if pos[p] == -1 || pos[p+1] == -1 {
-						continue
+			reg := regexp.MustCompile(tmp[0])
+			apos := reg.FindAllStringSubmatchIndex(content, -1)
+			if len(apos) > 0 {
+				pos := apos[0]
+				for k, p := range posm {
+					if len(pos) > p {
+						if pos[p] == -1 || pos[p+1] == -1 {
+							continue
+						}
+						val := content[pos[p]:pos[p+1]]
+						rep[k] = val
 					}
-					val := content[pos[p]:pos[p+1]]
-					rep[k] = val
 				}
 			}
 		}
-	}
+	}, func(err interface{}) {
+		rep["err"] = fmt.Sprint(err)
+	})
 	return rep
 }
 

+ 10 - 11
src/jy/extract/extract.go

@@ -69,17 +69,16 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 //启动抽取
 func StartExtractTaskId(taskId string) bool {
 	ext := TaskList[taskId]
+	ext = &ExtractTask{}
+	ext.Id = taskId
+	ext.InitTaskInfo()
+	ext.TaskInfo.DB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
+	ext.InitRulePres()
+	ext.InitRuleBacks()
+	ext.InitRuleCore()
+	ext.InitTag()
+	ext.InitClearFn()
 	if ext == nil {
-		ext = &ExtractTask{}
-		ext.Id = taskId
-		ext.IsRun = true
-		ext.InitTaskInfo()
-		ext.TaskInfo.DB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
-		ext.InitRulePres()
-		ext.InitRuleBacks()
-		ext.InitRuleCore()
-		ext.InitTag()
-		ext.InitClearFn()
 		//只启动一次taskId
 		go RunExtractTask(ext)
 	}
@@ -120,7 +119,7 @@ func RunExtractTask(ext *ExtractTask) {
 	}
 	//更新task.s_extlastid
 	db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
-	time.AfterFunc(30*time.Minute, func() { RunExtractTask(ext) })
+	time.AfterFunc(1*time.Minute, func() { RunExtractTask(ext) })
 }
 
 //信息预处理

+ 3 - 22
src/main_test.go

@@ -12,8 +12,8 @@ import (
 
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
-	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")
-	extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5b8dcc45a5cb26b9b7f68469", "10", "result_v3", "track_v3")
+	extract.StartExtractTaskId("5b8f804025e29a290415aee1")
+	//extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5b8dcc45a5cb26b9b7f68469", "10", "result_v3", "track_v3")
 	time.Sleep(300 * time.Second)
 }
 func Test_reg(t *testing.T) {
@@ -25,29 +25,10 @@ func Test_reg(t *testing.T) {
 		log.Println(k, v[1])
 	}
 }
-func Test_checkreg(t *testing.T) {
-	context := ` 项目名称:      新碶街道2018年美女姜河、向家村河、塘湾河①河道清淤工程招标公告`
-	rule := `(.*)项目名称[::][\n\s ]{0,10}__`
-	str := extract.PreRulesCheckReg(context, rule)
-	log.Println(str)
-	rule = `(.*)招标公告__$1`
-	str = extract.BackRulesCheckReg(str, rule)
-	log.Println(str)
-
-	context = `新碶街道2018年美女姜河、向家村河、塘湾河①河道清淤工程招标公告郑州`
-	rule = `(.*)招标公告(.*)__2:projectname,4:city`
-	tmp := extract.ExtRulesCheckReg("projectname", context, rule)
-	log.Println(tmp)
-
-	rule = `(关于|就)?(.{6,70})(招标|中标|成交|延期|变更)公告__4`
-	context = `关于新碶街道2018年美女姜河、向家村河、塘湾河①河道清淤工程招标公告`
-	tmp = extract.ExtRulesCheckReg("projectname", context, rule)
-	log.Println(tmp)
-}
 
 func Test_paths(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_v3")
-	tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "v3_track")
+	tracks := track.GetTrackPath("5b8dd276a5cb26b9b7faaa7c", "projectname", "rack_v3", "result_v3")
 	for code, v := range tracks {
 		if tmp, ok := v.([]map[string]interface{}); ok {
 			for k, v := range tmp {