Ver Fonte

Merge branch 'dev3.2' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.2

zhangjinkun há 6 anos atrás
pai
commit
1a23e4e6aa

+ 4 - 1
src/jy/pretreated/analykv.go

@@ -196,7 +196,7 @@ func FindKv(con, tag string, from int) (m *SortMap) {
 					v = str1
 					if k0 < len(strs)-1 {
 						s2 := u.TrimLRSpace(strings.Join(strs[k0+1], ""), "")
-						if len([]rune(s2)) < 10 && !Key.MatchString(s2) {
+						if len([]rune(s2)) < 10 && !regexp.MustCompile("^[0-9]+[、]+$").MatchString(s2) && !Key.MatchString(s2) {
 							v += s2
 							k0++
 						}
@@ -334,6 +334,9 @@ func FindKv(con, tag string, from int) (m *SortMap) {
 }
 
 func keydetail(k, v string, m *SortMap, tag string, pos int, strs [][]string, matchMap map[string]map[string]bool, from int) {
+	if regexp.MustCompile("^[0-9]+[、]+$").MatchString(v){
+		return
+	}
 	k = space.ReplaceAllString(k, "")
 	if len([]rune(k)) > 1 {
 		if len([]rune(k)) < 5 && strings.HasPrefix(k, "联系") || ContactInfoMustReg.MatchString(k) {

+ 2 - 0
src/jy/pretreated/analystep.go

@@ -6,6 +6,7 @@ package pretreated
 import (
 	"encoding/json"
 	"jy/util"
+	//"log"
 	"strings"
 
 	"github.com/PuerkitoBio/goquery"
@@ -40,6 +41,7 @@ func AnalyStart(job *util.Job) {
 					processTableInBlock(bl_bl, job)
 				}
 			}
+			FindProjectCode(bl.Text, job) //匹配项目编号
 			processTableInBlock(bl, job)
 			//新加 未分块table中未能解析到中标候选人,从正文中解析
 			if job.Winnerorder == nil || len(job.Winnerorder) == 0 {

+ 2 - 2
src/jy/pretreated/analytable.go

@@ -107,8 +107,8 @@ var (
 	underline                   = regexp.MustCompile("_+$")
 	iswinnertabletag            = regexp.MustCompile("(中标|候选人|成交|结果)")
 	nswinnertabletag            = regexp.MustCompile("[评得分估]+")
-	projectcodeReg              = regexp.MustCompile(`((|\(|\[){1}(编号|项目编号|标段编号){1}(:|:)(.){4,30}()|\)|\])`)
-	projectcodeReg2             = regexp.MustCompile(`((?:^|\n)编号|项目编号|标段编号){1}(:|:)(.){4,30}[0-9a-zA-Z]`)
+	projectcodeReg              = regexp.MustCompile(`((|\(|\[){1}(编号|项目编号|标段编号|招标编号){1}(:|:)(.){4,30}()|\)|\])`)
+	projectcodeReg2             = regexp.MustCompile(`((?:^|\n)编号|项目编号|标段编号){1}(:|:)(.){4,30}[0-9a-zA-Z]`)
 	projectcodeReg3             = regexp.MustCompile("(^询价单编号[A-Za-z0-9/-]*|公告编号[A-Za-z0-9/-]*)")
 	jsonReg                     = regexp.MustCompile(`\{.+:[^}]*\} `) //  \{".*\":\".+\"}
 	regHz                       = regexp.MustCompile("[\u4e00-\u9fa5]")

+ 4 - 4
src/res/fieldscore.json

@@ -331,19 +331,19 @@
         "positivewords": [
             {
                 "describe": "有关键字加分",
-                "regstr": "(财采|招字|财购){1}",
+                "regstr": "(财采|招字|财购|赣购){1}",
                 "score": 2
             },
             {
                 "describe": "号结尾加分",
                 "regstr": ".{4,35}(号)$",
-                "score": 3
+                "score": 2
             }
         ],
         "negativewords": [
             {
-                "describe": "纯数字减分",
-                "regstr": "^\\d{8,}$",
+                "describe": "长度年月日纯数字减分",
+                "regstr": "^\\d{8}$",
                 "score": -1
             },
             {