maxiaoshan il y a 5 ans
Parent
commit
9fcd893f3a

+ 2 - 1
src/jy/pretreated/analykv.go

@@ -350,12 +350,13 @@ func keydetail(k, v string, m *SortMap, tag string, pos int, strs [][]string, ma
 				相关竞价人对成交结果有异议的,可自本公告发布之日起三日内书面提出。
 				联系方式:卢明珠 0871-66136373
 			*/
-			if doubtMap[pos-1] { //当识别到中标、采购、代理标签后,对其后的联系人、电话等信息判断是否属于该标签
+			if doubtMap[pos-1] && len(m.Map) == 1 { //当识别到中标、采购、代理标签后,对其后的联系人、电话等信息判断是否属于该标签
 				goto L
 			}
 			num := 0
 			bf := false
 			for i := len(m.Keys) - 1; i > -1; i-- {
+				//u.Debug("k", k)
 				num++
 				if from == 1 && !ContactType["代理机构"].MatchString(k) && ContactType["代理机构"].MatchString(m.Keys[i]) && !IsContactKvHandle(k, matchMap["代理机构"]) {
 					matchMap["代理机构"][k] = true

+ 11 - 11
src/jy/pretreated/analytable.go

@@ -114,7 +114,7 @@ var (
 	regHz                       = regexp.MustCompile("[\u4e00-\u9fa5]")
 	winnerOrderAndBidResult     = regexp.MustCompile("((中标)?候选人|(中标|评标)结果)")
 	WinnerOrderStr              = regexp.MustCompile(`(集团|公司|学校|中心|家具城|门诊|\[大中小\]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$`)
-	DoubtReg                    = regexp.MustCompile("((|交易)中心|有(疑问|质疑|异议|意见)|(书面)?提出|不再受理|投诉|质疑|书面形式|监督|交易中心|公示期(限)?|招标|采购)")
+	DoubtReg                    = regexp.MustCompile("(我中心|有(疑问|质疑|异议|意见)|(书面)?提出|不再受理|投诉|质疑|书面形式|监督|公示期(限)?)")
 )
 
 //在解析时,判断表格元素是否隐藏
@@ -244,10 +244,10 @@ func (table *Table) KVFilter(isSite bool, codeSite string) {
 			MergeKvTags(table.StandKV, kvTags)
 		} else {
 			//u.Debug(k, v, "---------")
-			if strings.Contains(k,"总价"){
-				if vvvv,ok := v.([]string);ok && len(vvvv)>0{
+			if strings.Contains(k, "总价") {
+				if vvvv, ok := v.([]string); ok && len(vvvv) > 0 {
 					as.RemoveKey("报价")
-					as.AddKey(k,vvvv[len(vvvv)-1])
+					as.AddKey(k, vvvv[len(vvvv)-1])
 					continue
 				}
 			}
@@ -477,15 +477,15 @@ func (table *Table) sortKVArr(as *SortMap, isSite bool, codeSite string) {
 							tmp.Weight = vv[0].Weight
 							tmp.Key = vv[0].Key
 							tmp.IsInvalid = vv[0].IsInvalid
-							if kk == "单品报价"||kk == "中标金额"||kk == "预算"{
-								if strings.Contains(k,"万"){
-									tmp.Value = vvvvvv+"万"
-								}else if strings.Contains(k,"亿"){
-									tmp.Value = vvvvvv+"亿"
-								}else {
+							if kk == "单品报价" || kk == "中标金额" || kk == "预算" {
+								if strings.Contains(k, "万") {
+									tmp.Value = vvvvvv + "万"
+								} else if strings.Contains(k, "亿") {
+									tmp.Value = vvvvvv + "亿"
+								} else {
 									tmp.Value = vvvvvv
 								}
-							}else {
+							} else {
 								tmp.Value = vvvvvv
 							}
 							table.StandKV[kk] = append(table.StandKV[kk], &tmp)

+ 2 - 2
src/main.go

@@ -13,10 +13,10 @@ import (
 	_ "net/http/pprof"
 	qu "qfw/util"
 
-	log "github.com/donnie4w/go-logger/logger"
 	"qfw/util/elastic"
 	"qfw/util/redis"
 
+	log "github.com/donnie4w/go-logger/logger"
 )
 
 func init() {
@@ -42,7 +42,7 @@ func init() {
 	//	log.Fatal("ElasticClient err:", err)
 	//} else {
 	//	util.ElasticClient = eClient
-		util.ElasticClientIndex = qu.ObjToString(util.Config["elasticsearch_index"])
+	util.ElasticClientIndex = qu.ObjToString(util.Config["elasticsearch_index"])
 	util.ElasticClientType = qu.ObjToString(util.Config["elasticsearch_type"])
 	util.ElasticClientDB = qu.ObjToString(util.Config["winner_enterprise"])
 	//}

+ 1 - 1
src/main_blocktest.go

@@ -51,7 +51,7 @@ func all() {
 }
 func one() {
 	m := mongodbutil.MgoFactory(3, 3, 120, "192.168.3.207:27081", "qfw")
-	d, _ := m.FindById("bidding", "59e47b5a40d2d9bbe82296bf", extract.Fields)
+	d, _ := m.FindById("bidding", "5e17deee50b5ea296ec939fb", extract.Fields)
 	com(*d)
 }
 func com(doc map[string]interface{}) {

+ 3 - 3
src/main_test.go

@@ -26,10 +26,10 @@ func Test_han(t *testing.T) {
 	os.Exit(0)
 }
 func Test_task(t *testing.T) {
-	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_kf")
+	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27092", "extract_dev32")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")5c528686698414055c47b115
-	//extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "59e47b5a40d2d9bbe82296bf", "1", "result_mxs", "result_mxs")
-	extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5e17e00e85a9271abf0860a6", "1", "result_mxs", "result_mxs")
+	extract.StartExtractTestTask("5e103206234ddc34b406c5d1", "5e17deee50b5ea296ec939fb", "1", "mxs_v1", "mxs_v1")
+	//extract.StartExtractTestTask("5cdd3025698414032c8322b1", "5e17e00e85a9271abf0860a6", "1", "mxs_v1", "mxs_v1")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)
 }

+ 44 - 1
src/res/fieldscore.json

@@ -361,7 +361,50 @@
             }
         ]
     },
-    "winnerperson": {
+    "buyeraddr": {
+        "type": "string",
+        "negativewords": [
+            {
+                "describe": "出现符号",
+                "regstr": "[*]",
+                "score": -10
+            },
+			{
+                "describe": "是数字",
+                "regstr": "^\\d*[×―—-\\-]*[\u3000\u2003\u00a0\\s]*\\d*$",
+                "score": -10
+            },
+			{
+                "describe": "出现日期",
+                "regstr": "(\d)+(年|月|日)+",
+                "score": -10
+            },
+			{
+                "describe": "包含负分",
+                "regstr": "(详见公告)",
+                "score": -10
+            }
+        ],
+        "length": [
+            {
+                "describe": "[gt,lte,score]",
+                "range": [
+                    0,
+                    6,
+                    -10
+                ]
+            },
+			 {
+                "describe": "[gt,∞,score]",
+                "range": [
+                    90,
+                    -1,
+                    -10
+                ]
+            }
+        ]
+    },
+	 "winnerperson": {
         "type": "string",
         "positivewords": [
             {