瀏覽代碼

抽取相关修改

zhengkun 4 年之前
父節點
當前提交
9fd48c8b91

+ 2 - 0
data_monitoring/listen_data/src/main.go

@@ -85,6 +85,8 @@ func main()  {
 	}else {
 		log.Println("正常监听...")
 	}
+
+	decodeJyUrl()
 	time.Sleep(99999*time.Hour)
 }
 

+ 39 - 0
data_monitoring/listen_data/src/zkmethod.go

@@ -0,0 +1,39 @@
+package main
+
+import (
+	"fmt"
+	log "github.com/donnie4w/go-logger/logger"
+	qu "qfw/util"
+	"qfw/util/elastic"
+	"qfw/util/redis"
+)
+
+//解密
+func decodeJyUrl()  {
+
+	test := "ABCY1wJYzwOMyg4NHdxZ3IkCCQCIDFjcWhwPw4nLS4NYGpzcQFUCSs%3D"
+	var Decode  = qu.CommonDecodeArticle("content", test)
+	log.Debug(Decode[0])
+}
+//加密
+func encodeJyUrl()  {
+	var Url = "https://www.jianyu360.com/article/content/%s.html"
+	var Encode  = fmt.Sprintf(Url, qu.CommonEncodeArticle("content", "60b9bf4a8a2adb30a5a25000"))
+	log.Debug(Encode)
+}
+
+func testMethod()  {
+	qu.Catch()
+	log.Debug("处理数据...")
+
+	redis.InitRedis("test=127.0.0.1:6379")
+	elastic.InitElasticSize("http://ela.spdata.jianyu360.com",10)
+
+	var Url = "https://www.jianyu360.com/article/content/%s.html"
+	var Encode  = fmt.Sprintf(Url, qu.CommonEncodeArticle("content", "60b9bf4a8a2adb30a5a25000"))
+	var Decode  = qu.CommonDecodeArticle("content", "AKSY1wJZDweAj07GXd6Z3IzJicvASJgdnxxPygFLCFFcFVzeSNUCZ4%3D") //[0]
+	log.Debug(Encode)
+	log.Debug(Decode[0])
+}
+
+

+ 1 - 1
src/config.json

@@ -32,7 +32,7 @@
     "iscltlog": false,
     "brandgoods": false,
     "pricenumber":true,
-    "udptaskid": "607fb74f049a9923d8a4efd9",
+    "udptaskid": "60b493c2e138234cb4adb640",
     "udpport": "1484",
     "nextNode": [
         {

+ 1 - 1
src/jy/clear/tonumber.go

@@ -164,7 +164,7 @@ var moneyUnitRegBool = regexp.MustCompile(`(中标金额|成交金额|合同金
 
 //数字金额转换
 func numMoney(data []interface{}) ([]interface{}, bool) {
-	tmp := fmt.Sprint(data[0])
+	tmp := fmt.Sprintf("%f",data[0])
 	//费率转换% ‰
 	flv := float64(1)
 	if strings.HasSuffix(tmp, "%") {

+ 33 - 1
src/jy/extract/extpackage.go

@@ -27,7 +27,7 @@ func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *Extr
 func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string, isclearMoney string) {
 	if p != nil {
 		for pk, pv2 := range p.KvTags {
-			if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
+			if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额"||pk == "招标代理费(中标服务费)") {
 				tmp := []*ju.Tag{}
 				var tmpindex, tmpweight int = -9999, -9999
 				for ii, vv := range pv2 {
@@ -74,6 +74,17 @@ func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{},
 					}
 					continue
 				}
+				if qu.Float64All((*sonJobResult)["agencyfee"]) == 0 && tags[0].Key == "招标代理费(中标服务费)" {
+					lock.Lock()
+					cfn := e.ClearFn["agencyfee"]
+					lock.Unlock()
+					data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""}, codeSite, isclearMoney)
+					if istrue, ok := data[len(data)-1].(bool); istrue && ok {
+						(*sonJobResult)["agencyfee"] = data[0]
+					}
+					continue
+				}
+
 				if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "") && tags[0].Key == "中标单位" {
 					if winnerorderNotReg.MatchString(pv[0].Value) {
 						continue
@@ -121,6 +132,17 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 						j.BlockPackage[kk] = vv
 					}
 				}
+				if vv.Agencyfee > 0 {
+					lock.Lock()
+					cfn := e.ClearFn["agencyfee"]
+					lock.Unlock()
+					data := clear.DoClearFn(cfn, []interface{}{vv.Agencyfee, ""}, codeSite, j.IsClearnMoney)
+					if istrue, ok := data[len(data)-1].(bool); istrue && ok {
+						vv.Agencyfee = qu.Float64All(data[0])
+						j.BlockPackage[kk] = vv
+					}
+				}
+
 			}
 			for _, ev := range e.PkgRuleCores {
 				for _, eve := range ev.RuleCores {
@@ -183,6 +205,9 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 					if pkg.IsTrueBudget {
 						sonJobResult["budget"] = pkg.Budget
 					}
+					if pkg.IsTrueAgencyfee {
+						sonJobResult["agencyfee"] = pkg.Agencyfee
+					}
 					if pkg.IsTrueBidamount {
 						sonJobResult["bidamount"] = pkg.Bidamount
 					}
@@ -230,6 +255,13 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 								kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite, j.IsClearnMoney)
 							}
 						}
+						if qu.Float64All(sonJobResult["agencyfee"]) == 0 && pkg.IsTrueAgencyfee {
+							for _, bv := range j.Block {
+								kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite, j.IsClearnMoney)
+								kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite, j.IsClearnMoney)
+								kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite, j.IsClearnMoney)
+							}
+						}
 					}
 					if sonJobResult["name"] == nil || sonJobResult["name"] == "" {
 						sonJobResult["name"] = j.Title

+ 38 - 3
src/jy/extract/extract.go

@@ -1097,6 +1097,9 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 				if in.Field == "budget" && vbpkg.Budget > 0 {
 					continue
 				}
+				if in.Field == "agencyfee" && vbpkg.Agencyfee > 0 {
+					continue
+				}
 				if in.Field == "bidamount" && vbpkg.Bidamount > 0 {
 					continue
 				}
@@ -1191,7 +1194,17 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
 									j.BlockPackage[k].IsTrueBudget = true
 								}
 								break
-							} else if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
+							} else if in.Field == "agencyfee" && vbpkg.Agencyfee <= 0 {
+								lock.Lock()
+								cfn := e.ClearFn[in.Field]
+								lock.Unlock()
+								data := clear.DoClearFn(cfn, []interface{}{strings.TrimSpace(rep[in.Field+"_"+fmt.Sprint(i)]), j.Content}, j.SpiderCode, j.IsClearnMoney)
+								if data[len(data)-1].(bool) {
+									j.BlockPackage[k].Agencyfee = qu.Float64All(data[0])
+									j.BlockPackage[k].IsTrueAgencyfee = true
+								}
+								break
+							}else if in.Field == "bidamount" && vbpkg.Bidamount <= 0 {
 								lock.Lock()
 								cfn := e.ClearFn[in.Field]
 								lock.Unlock()
@@ -1936,7 +1949,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					tmp[v.Field] = v.Value
 					break
 				}
-				if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
+				if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget" ) && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
 					tmp[v.Field] = v.Value
 					break
 				}
@@ -1953,7 +1966,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if len(j.PackageInfo) > 0 { //分包信息
 			tmp["package"] = j.PackageInfo
 			//包预算,中标金额合并大于抽取就覆盖
-			var tmpBidamount, tmpBudget float64
+			var tmpBidamount, tmpBudget,tmpAgencyfee float64
 			//s_winner逗号分隔拼接,分包中标人
 			var tmpstr, savewinner []string
 			//按包排序
@@ -1972,10 +1985,18 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 					if v["bidamount"] != nil {
 						tmpBidamount += qu.Float64All(v["bidamount"])
 					}
+					if v["agencyfee"] != nil {
+						tmpAgencyfee += qu.Float64All(v["agencyfee"])
+					}
 				}
 				if qu.Float64All(tmp["budget"]) < tmpBudget {
 					tmp["budget"] = tmpBudget
 				}
+				if qu.Float64All(tmp["agencyfee"]) < tmpAgencyfee {
+					tmp["agencyfee"] = tmpAgencyfee
+				}
+
+
 				if qu.Float64All(tmp["bidamount"]) > 0 && qu.Float64All(tmp["budget"]) > 0 && (qu.Float64All(tmp["bidamount"])/10 > qu.Float64All(tmp["budget"])) {
 					tmp["bidamount"] = tmpBidamount
 				} else if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
@@ -1989,8 +2010,16 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 							tmp["budget"] = v["budget"]
 						}
 					}
+				}
 
+				if tmp["agencyfee"] == nil || tmp["agencyfee"] == 0 {
+					for _, v := range j.PackageInfo {
+						if v["agencyfee"] != nil {
+							tmp["agencyfee"] = v["agencyfee"]
+						}
+					}
 				}
+
 				if tmp["bidamount"] == nil || tmp["bidamount"] == 0 {
 					for _, v := range j.PackageInfo {
 						if v["bidamount"] != nil {
@@ -2226,6 +2255,8 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 			tmp["result"] = result
 			//tmp["resultf"] = resultf
+			//_,err :=db.Mgo.Get().DB("zhengkun").C("result_data").Upsert(`{"_id":"`+_id+`"}`,map[string]interface{}{"$set": tmp})
+			//log.Debug("save:",err)
 			b := db.Mgo.Update(e.TaskInfo.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
 			if !b {
 				log.Debug(e.TaskInfo.TestColl, _id)
@@ -2309,6 +2340,10 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
 			delete(tmp, "bidamount")
 		}*/
 	}
+	//投标方式
+	if bidmode, ok := tmp["bidmode"].(int); !ok || bidmode==0 {
+		delete(tmp, "bidmode")
+	}
 	return tmp
 }
 

+ 1 - 1
src/jy/extract/score.go

@@ -145,7 +145,7 @@ func ScoreFields(j *ju.Job, ftag map[string][]*Tag) map[string][]*ju.ExtField {
 
 			//没有抽取到值,不打分
 			if string_value := fmt.Sprint(tmpsvalue.Value); string_value == "" || string_value == "0" || string_value == "<nil>" {
-				if field == "budget" || field == "bidamount" {
+				if field == "budget" || field == "bidamount"{
 					if tmpsvalue.IsTrue {
 						//continue
 					} else {

+ 3 - 2
src/jy/pretreated/analystep.go

@@ -19,7 +19,7 @@ var hisReg = regexp.MustCompile("(开标记录|类似业绩|历史业绩|填报
 var hisReg2 = regexp.MustCompile("(开标记录|业绩|[得评]+[审打]{0,2}分情况|无效标)[::\n]*.*?[\n]?(</tr>|</table>|</td>)")
 var formattext = regexp.MustCompile("(投标总价)([0-9,.万元]*)")
 var formattext2 = regexp.MustCompile("中标单价.*(中标总价.*)")
-var formattext3 = regexp.MustCompile("(同类项目业绩、)")
+var formattext3 = regexp.MustCompile("(同类项目业绩、|[1-9].[0-9]包段划分)")
 
 func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con := job.Content
@@ -31,6 +31,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 	con = hisReg2.ReplaceAllString(con, "${2}")
 	con = formattext.ReplaceAllString(con, "${1}:${2}")
 	con = formattext2.ReplaceAllString(con, "${1}")
+
 	con = formatText(con, "all")
 	job.Content = con
 	//计算表格占比,返回表格数组、占比
@@ -123,7 +124,7 @@ func AnalyStart(job *util.Job, isSite bool, codeSite string) {
 
 func processTableInBlock(bl *util.Block, job *util.Job, isSite bool, codeSite string) {
 	//块中再查找表格(块,处理完把值赋到块)
-	bl.Text = formatText(bl.Text, "biangeng")
+	//bl.Text = formatText(bl.Text, "biangeng")
 	tabs, _ := ComputeConRatio(bl.Text, 2)
 	for i, tab := range tabs {
 		job.HasTable = 1

+ 1 - 1
src/jy/pretreated/analytable.go

@@ -916,7 +916,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat, isSite bool, codeSi
 				//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
 				table.KVFilter(isSite, codeSite)
 			}
-			//对有表头表格的处理
+			//对有表头表格的处理
 			if table.Tag != "" {
 				co, m, b := CheckMultiPackage(table.Tag, "") //分包处理
 				if b {

+ 2 - 0
src/jy/pretreated/colonkv.go

@@ -227,7 +227,9 @@ func formatText(content, key string) string {
 		}
 		newCon += v.Text + "\n"
 	}
+
 	content = regEndWrap.ReplaceAllString(newCon, "")
+
 	//if key == "kv"{
 	//	log.Println("清理前后\n",content)
 	//}

+ 2 - 0
src/jy/util/article.go

@@ -140,6 +140,8 @@ type BlockPackage struct {
 	WinnerTel       string                   //中标单位联系电话
 	WinnerPerson    string                   //中标联系人
 	Bidamount       float64                  //标段(包)中标价
+	Agencyfee       float64                  //包服务费
+	IsTrueAgencyfee bool                     //
 	IsTrueBidamount bool                     //标段(包)中标价 0是否有效
 	Index           string                   //序号 (转换后编号,只有数字或字母)
 	Type            string                   //类型 (匹配后面的标段、包之类的词)

+ 5 - 1
src/main.go

@@ -70,15 +70,19 @@ func init() {
 }
 
 func main() {
-
 	extract.ExtractUdp() //udp通知抽取
 	//extract.ClearUdp()   //udp通知清理
 	go extract.Export()
 	go Router.Run(":" + qu.ObjToString(util.Config["port"]))
 	go log.Debug("启动..", qu.ObjToString(util.Config["port"]))
+
 	go func() {
 		http.ListenAndServe("localhost:10000", nil)
 	}()
+
+
+
+
 	lock := make(chan bool)
 	<-lock
 

+ 2 - 2
udps/main.go

@@ -22,8 +22,8 @@ func main() {
 	flag.IntVar(&p, "p", 1484, "端口")
 	flag.IntVar(&tmptime, "tmptime", 0, "时间查询")
 	flag.StringVar(&tmpkey, "tmpkey", "", "时间字段")
-	flag.StringVar(&id1, "gtid", "6075ea3162ad7d3e568c7590", "gtid")
-	flag.StringVar(&id2, "lteid", "6076546d27cdc4cf2bc60fde", "lteid")
+	flag.StringVar(&id1, "gtid", "1fffffffffffffffffffffff", "gtid")
+	flag.StringVar(&id2, "lteid", "9fffffffffffffffffffffff", "lteid")
 	flag.StringVar(&ids, "ids", "", "id1,id2")
 	flag.StringVar(&stype, "stype", "biddingall", "stype,传递类型")
 	flag.StringVar(&bkey, "bkey", "", "bkey,加上此参数表示不生关键词和摘要")