zhangjinkun 6 years ago
parent
commit
cf5bf6bbfe
3 changed files with 23 additions and 13 deletions
  1. 1 1
      src/jy/cluster/aliecs.go
  2. 5 0
      src/jy/extract/extract.go
  3. 17 12
      src/res/fieldscore.json

+ 1 - 1
src/jy/cluster/aliecs.go

@@ -41,7 +41,7 @@ func RunInstances(TaskName string, num, hours int) {
 				//[]string{"SecurityGroupId", "sg-bp16x3td2evrejhkshp7"},
 				[]string{"VSwitchId", qu.ObjToString(esconfig["VSwitchId"])},
 				[]string{"InternetMaxBandwidthIn", "50"},
-				[]string{"InternetMaxBandwidthOut", "25"},
+				[]string{"InternetMaxBandwidthOut", "0"},
 				[]string{"InstanceChargeType", "PostPaid"},
 				[]string{"SpotStrategy", "SpotWithPriceLimit"},
 				[]string{"SpotPriceLimit", "1.99"},

+ 5 - 0
src/jy/extract/extract.go

@@ -335,6 +335,11 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 
 //抽取-规则
 func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
+	//废标、流标、ppp等跳过
+	b := IsExtract(in.Field, j.Title, j.Content)
+	if !b {
+		return
+	}
 	if in.IsLua {
 		lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
 		lua.KvMap = getKvByLuaFields(extfrom, j, in, et.Tag)

+ 17 - 12
src/res/fieldscore.json

@@ -40,20 +40,20 @@
         "position": [
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(处|委员会|办公室|局|中心|协会|公司|政府|大学|学校|医院|集团|银行)$",
-                "score": 3
+                "regstr": ".{2,100}(委员会|办公室|幼儿园|动物园|图书馆|殡仪馆|博物馆|基地|青年宫|少年宫|艺术宫|电视台|中心|协会|公司|政府|初中|集团|银行|[大中小]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场)$",
+                "score": 5
             },
             {
-                "describe": "以*结尾",
-                "regstr": "交易中心",
-                "score": -3
+                "describe": "包含负分",
+                "regstr": "(附件|招标失败|交易中心|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
+                "score": -20
             }
         ],
         "length": [
             {
                 "describe": "长度打分min>val:-6,min<=val<=max:1,max<val:-1",
-                "min": 4,
-                "max": 15,
+                "min": 5,
+                "max": 20,
                 "score": [
                     -6,
                     1,
@@ -67,15 +67,20 @@
         "position": [
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(公司|合作社)$",
-                "score": 3
+                "regstr": ".{2,100}(集团|公司|学校|中心|家具城|门诊|[大中小]学|部|院|局|厂|店|所|队|社|室|厅|段|会|场|行)$",
+                "score": 5
+            },
+            {
+                "describe": "包含负分",
+                "regstr": "(附件|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\\d[\\s]{0,10}(\\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})",
+                "score": -20
             }
         ],
         "length": [
             {
                 "describe": "长度打分min>val:-6,min<=val<=max:1,max<val:-1",
-                "min": 4,
-                "max": 15,
+                "min": 5,
+                "max": 20,
                 "score": [
                     -6,
                     1,
@@ -102,7 +107,7 @@
             {
                 "describe": "长度打分min>val:-6,min<=val<=max:1,max<val:-1",
                 "min": 4,
-                "max": 15,
+                "max": 30,
                 "score": [
                     -6,
                     1,