wangchuanjin 7 vuotta sitten
vanhempi
commit
59fde941b0

+ 4 - 3
jyinfomatch/src/config.json

@@ -5,11 +5,12 @@
     "mgoSize": 50,
     "mgoAddr": "192.168.3.18:27080",
 	"mongodbName": "jyqyfw",
-	"collection": "usermail",
+	"collection": "usermail_1",
     "elasticPoolSize": 10,
-    "elasticsearch": "http://192.168.3.18:9800",
+    "elasticsearch": "http://192.168.3.14:9800",
+	"redisServers":"filter=192.168.3.14:2379",
     "maxSearch": 10000,
 	"saveSize": 100,
 	"poolSize": 100,
-	"fields":["_id","title","detail","projectscope","publishtime","toptype","subtype","type","area","href","projectname","projectcode","s_winner","buyer","budget","bidamount","bidopentime","s_subscopeclass","bidstatus","agency","projectinfo"]
+	"fields":["_id","title","detail","projectscope","publishtime","toptype","subtype","type","area","href","projectname","projectcode","s_winner","buyer","budget","bidamount","bidopentime","s_subscopeclass","bidstatus","agency","projectinfo","buyerperson","buyertel","city"]
 }

+ 0 - 0
jyinfomatch/src/luascript/dev.lua → jyinfomatch/src/luascript/dev.lua.bak20171205


+ 102 - 0
jyinfomatch/src/luascript/jyMi1XQgMABQNcSkBMIhBq_1.lua

@@ -0,0 +1,102 @@
+--用户唯一标识
+appid="jyMi1XQgMABQNcSkBMIhBq"
+--打分
+matchscore=10
+--标签
+department="硬件"
+--匹配词
+title_keys={
+	{"政务云","不动产登记","涉密网","机要网","政法网","电子政务","警务云"},
+	{"三通两平台","云学堂","云课堂","云教室","薄改","改薄","校园网","教育云"},
+	{"卫生云","卫生信息平台","医疗卫生机构管理信息系统","医疗机构管理信息系统","工业4.0","智能制造","全民健康","中医馆","MES","HIS","HANA","高性能计算"},
+	{"交通一卡通","两网融合","智慧交通","智慧高速","智慧公交"},
+	{"交换机","路由器","防火墙","网络安全","负载均衡","服务器","存储","无线网","大数据","云计算","云平台","等保","等级保护"},
+	{"信息化建设","数据中心","网络建设","网络改造","网络设备"}
+}
+title_keys_1={"政府","教育","企业","公共事业","产品维度","其他维度"}
+detail_keys={"核心交换机","汇聚交换机","核心路由器","高性能计算","防火墙","四路服务器","双路服务器","数据中心"}
+--排除词
+title_notkey1={"监理","设计","施工","装修","维修","维护","维保","运维","打印","扫描","投影","数据整合","缆","土建","空调","电脑","服装","作废"}
+title_notkey2={"软件","办公","服务"}
+title_notkey2_2={{"数据中心","平台","网络"},{"办公网"},{"服务器","硬件集成","云平台建设","网络"}}
+--脚本主入口方法
+function filterValidate(data)
+	--匹配上的父节点,对应的子节点,是否成功
+	local y_p,y_k,y_ok = "","",false
+	--标题处理
+	local title = string.upper(data["title"])
+	if title ~= nil and title ~= "" then
+		--标题匹配
+		y_p,y_k,y_ok = matchTitle(title)
+		if y_ok then
+			--print("title包含--",y_p,y_k,y_ok)
+		end
+		--如果标题匹配上,进行标题排除
+		if y_ok then
+			--排除匹配上的词,对应的保留词,是否成功
+			local n_p,n_k,n_ok = excludeTitle(title)
+			if n_ok then
+				if n_p ~= "" and n_k ~= "" then
+					--print("title排除--","排除",n_p,",保留",n_k,",",n_ok)
+				else
+					--print("title排除--",n_p,n_k,n_ok)
+				end
+				return nil
+			end
+		end
+	end
+	--正文处理
+	local detail = string.upper(data["detail"])
+	--print(detail)
+	if detail ~= nil and detail ~= "" then
+		--如果标题没有匹配上,匹配正文
+		if not y_ok then
+			--排除匹配上的词,对应的保留词,是否成功
+			y_p,y_k,y_ok = matchDetail(detail)
+			if y_ok then
+				--print("detail包含--",y_p,y_k,y_ok)
+			end
+		end
+	end
+	--没有匹配上
+	if not y_ok then
+		return nil
+	end
+	return data
+end
+--标题匹配
+function matchTitle(value)
+	for k,keys in pairs(title_keys) do
+		local ok,matchkeys = contain(value,keys)
+		if ok then
+			return title_keys_1[k],matchkeys,true
+		end
+	end
+	return "","",false
+end
+--标题排除
+function excludeTitle(value)
+	local ok,matchkeys = contain(value,title_notkey1)
+	if ok then
+		return matchkeys,"",true
+	end
+	for k,key in pairs(title_notkey2) do
+		local ok_1,matchkeys_1 = contain(value,{key})
+		if ok_1 then
+			local ok_2,matchkeys_2 = contain(value,title_notkey2_2[k])
+			if ok_2 then
+				return key,matchkeys_2,false
+			end
+			return key,"",true
+		end
+	end
+	return "","",false
+end
+--正文匹配
+function matchDetail(value)
+	local ok,matchkeys = contain(value,detail_keys)
+	if ok then
+		return "",matchkeys,true
+	end
+	return "","",false
+end

+ 52 - 0
jyinfomatch/src/luascript/jyMi1XQgMABQNcSkBMIhBq_2.lua

@@ -0,0 +1,52 @@
+--用户唯一标识
+appid="jyMi1XQgMABQNcSkBMIhBq"
+--打分
+matchscore=5
+--标签
+department="软件"
+--匹配词
+title_keys={"政务内网","政务外网","党务网","政务网","机房改造","安全审计","卫生信息化","医疗信息平台","教育资源公共服务平台","智慧城市","智慧医疗","智慧教育"}
+--脚本主入口方法
+function filterValidate(data)
+	--匹配预算和行业
+	if matchOther(data) then
+		--print("预算大于1000万的信息化项目")
+		return data
+	end
+	--匹配上的父节点,对应的子节点,是否成功
+	local y_p,y_k,y_ok = "","",false
+	--标题处理
+	local title = string.upper(data["title"])
+	if title ~= nil and title ~= "" then
+		--标题匹配
+		y_p,y_k,y_ok = matchTitle(title)
+		if y_ok then
+			--print("title包含--",y_p,y_k,y_ok)
+		end
+	end
+	--没有匹配上
+	if not y_ok then
+		return nil
+	end
+	return data
+end
+--标题匹配
+function matchTitle(value)
+	local ok,matchkeys = contain(value,title_keys)
+	if ok then
+		return "",matchkeys,true
+	end
+	return "","",false
+end
+--预算大于1000万的信息化项目
+function matchOther(data)
+	local budget = data["budget"]
+	local subscopeclass = data["s_subscopeclass"]
+	if budget == nil or subscopeclass == nil then
+		return false
+	end
+	if budget > 10000000 and (string.find(subscopeclass,"^信息技术_") ~= nil or string.find(subscopeclass,",信息技术_") ~= nil) then
+		return true
+	end
+	return false
+end

+ 2 - 0
jyinfomatch/src/main.go

@@ -5,6 +5,7 @@ import (
 	"qfw/util"
 	"qfw/util/elastic"
 	"qfw/util/mongodb"
+	"qfw/util/redis"
 	"qyfw"
 	"strings"
 )
@@ -15,6 +16,7 @@ func init() {
 	qyfw.MaxSearch = util.IntAllDef(qyfw.SysConfig["maxSearch"], 10000)
 	mongodb.InitMongodbPool(util.IntAll(qyfw.SysConfig["mgoSize"]), qyfw.SysConfig["mgoAddr"].(string), qyfw.SysConfig["mongodbName"].(string))
 	elastic.InitElasticSize(qyfw.SysConfig["elasticsearch"].(string), util.IntAllDef(qyfw.SysConfig["elasticPoolSize"], 20))
+	redis.InitRedis(qyfw.SysConfig["redisServers"].(string))
 	qyfw.Collection = qyfw.SysConfig["collection"].(string)
 	qyfw.SaveSize = util.IntAllDef(qyfw.SysConfig["saveSize"], 200)
 	qyfw.PoolSize = util.IntAllDef(qyfw.SysConfig["poolSize"], 100)

+ 3 - 5
jyinfomatch/src/mainTest.go

@@ -10,7 +10,7 @@ import (
 	"qyfw"
 )
 
-func main_1() {
+func main_d() {
 	modle := flag.Int("m", 0, "模式")
 	id := flag.String("id", "", "信息id")
 	collection := flag.String("c", "", "表名")
@@ -29,12 +29,10 @@ func main_1() {
 		*id = qutil.CommonDecodeArticle("content", *id)[0]
 		log.Println("解密后id是", *id)
 	}
-	mongodb.InitMongodbPool(1, "192.168.3.14:27080", "jyqyfw")
+	mongodb.InitMongodbPool(1, "192.168.3.18:27080", "jyqyfw")
 	list := []map[string]interface{}{}
 	log.Println(*id, *collection)
-	data := mongodb.FindOne(*collection, map[string]interface{}{
-		"id": *id,
-	})
+	data := mongodb.FindById(*collection, *id, nil)
 	if data == nil || len(*data) == 0 {
 		log.Println("没有找到数据!")
 		return

+ 9 - 6
jyinfomatch/src/qyfw/handler.go

@@ -47,20 +47,23 @@ func runJob() {
 //初始化脚本
 func NewLuaScript(name, luafile string) *Job {
 	defer util.Catch()
-	job := &Job{}
-	job.Results = &[]map[string]interface{}{}
-	job.Name = name
-	job.ScriptFile = luafile
-	job.EachListPool = make(chan bool, PoolSize)
 	//只是单纯的验证一下,lua脚本是否有问题
 	script := &Script{}
 	if !script.LoadScript(name, luafile, true) {
 		return nil
 	}
-	job.Appid = script.GetVar("appid")
+	job := &Job{}
+	job.Appid = script.GetStringVar("appid")
 	if job.Appid == "" {
 		log.Println("error:从脚本", name, "中获取到Appid为空!")
 		return nil
 	}
+	job.Name = name
+	job.Results = &[]map[string]interface{}{}
+	job.ScriptFile = luafile
+	job.EachListPool = make(chan bool, PoolSize)
+	job.MatchScore = script.GetFloatVar("matchscore")
+	job.Department = script.GetStringVar("department")
+	job.RedisFiveData = map[string]*[]*map[string]interface{}{}
 	return job
 }

+ 166 - 17
jyinfomatch/src/qyfw/job.go

@@ -4,9 +4,14 @@
 package qyfw
 
 import (
+	"encoding/json"
+	"fmt"
 	"log"
+	"math"
 	qutil "qfw/util"
 	"qfw/util/mongodb"
+	"qfw/util/redis"
+	"strings"
 	"sync"
 	"time"
 	"util"
@@ -22,17 +27,22 @@ var (
 )
 
 type Job struct {
-	Name         string                    //脚本名称
-	Appid        string                    //用户唯一标识
-	Results      *[]map[string]interface{} //最终要存库的数据
-	Lock         sync.Mutex
-	WaitGroup    sync.WaitGroup
-	ScriptFile   string
-	EachListPool chan bool
+	Name          string                    //脚本名称
+	Appid         string                    //用户唯一标识
+	MatchScore    float64                   //打分
+	Department    string                    //标签
+	Results       *[]map[string]interface{} //最终要存库的数据
+	Lock          sync.Mutex
+	WaitGroup     sync.WaitGroup
+	ScriptFile    string
+	EachListPool  chan bool
+	RedisFiveData map[string]*[]*map[string]interface{}
+	FilterCount   int
 }
 
 //任务
 func (j *Job) Start(list *[]map[string]interface{}) {
+	defer qutil.Catch()
 	count := 0
 	for _, v := range *list {
 		j.EachListPool <- true
@@ -50,7 +60,7 @@ func (j *Job) Start(list *[]map[string]interface{}) {
 			result := j.ExecJob(script, &info)
 			//保存
 			if result != nil && len(*result) > 0 && IsSave {
-				j.Save(result, j.Appid, false)
+				j.Save(result, false)
 			}
 		}(v)
 		if count%200 == 0 {
@@ -59,7 +69,8 @@ func (j *Job) Start(list *[]map[string]interface{}) {
 		count++
 	}
 	j.WaitGroup.Wait()
-	j.Save(nil, j.Appid, true)
+	j.Save(nil, true)
+	j.UpdateRedis()
 	log.Println("脚本", j.Name, "执行完毕!")
 }
 
@@ -100,27 +111,165 @@ func (j *Job) ExecJob(script *Script, info *map[string]interface{}) *map[string]
 }
 
 //保存到mongodb
-func (j *Job) Save(result *map[string]interface{}, appid string, flag bool) {
+func (j *Job) Save(result *map[string]interface{}, flag bool) {
 	j.Lock.Lock()
 	defer j.Lock.Unlock()
 	if result != nil {
 		(*result)["createtime"] = time.Now().Unix()
-		(*result)["appid"] = appid
 		(*result)["id"] = (*result)["_id"]
+		(*result)["appid"] = j.Appid
+		(*result)["department"] = j.Department
+		(*result)["matchscore"] = j.MatchScore
 		delete(*result, "_id")
-		*j.Results = append(*j.Results, *result)
+		if j.Filter(*result) {
+			j.FilterCount++
+		} else {
+			*j.Results = append(*j.Results, *result)
+		}
 	}
 	length := len(*j.Results)
 	if length == 0 {
+		if flag {
+			log.Println(j.Name, "save", 0, "filter", j.FilterCount)
+			j.FilterCount = 0
+		}
 		return
 	}
 	if length >= SaveSize || flag {
-		thisSaveSize := SaveSize
-		if flag {
-			thisSaveSize = len(*j.Results)
-		}
-		log.Println(appid, "save", thisSaveSize)
+		log.Println(j.Name, "save", length, "filter", j.FilterCount)
 		mongodb.SaveBulk(Collection, *j.Results...)
 		j.Results = &[]map[string]interface{}{}
+		j.FilterCount = 0
+	}
+}
+
+//过滤
+func (j *Job) Filter(result map[string]interface{}) bool {
+	area := qutil.ObjToString(result["area"])
+	if area == "A" {
+		area = "全国"
+	}
+	publishtime := qutil.Int64All(result["publishtime"])
+	toptype := qutil.ObjToString(result["toptype"])
+	title := qutil.ObjToString(result["title"])
+	buyer := qutil.ObjToString(result["buyer"])
+	projectname := qutil.ObjToString(result["projectname"])
+	projectcode := qutil.ObjToString(result["projectcode"])
+	city := qutil.ObjToString(result["city"])
+	now := time.Now()
+	nowDate := qutil.FormatDate(&now, qutil.Date_yyyyMMdd)
+	redisKey := fmt.Sprintf("%s_%s_%s_%s_%s", j.Appid, j.Department, toptype, nowDate, area)
+	fiveDay := util.GetLatelyFiveDay(5)
+	isRepeat := false
+L:
+	for _, v := range fiveDay {
+		keys := []string{fmt.Sprintf("%s_%s_%s_%s_%s", j.Appid, j.Department, toptype, v, area)}
+		//如果不是全国的话,和全国的数据进行比较
+		if area != "全国" {
+			keys = append(keys, fmt.Sprintf("%s_%s_%s_%s_%s", j.Appid, j.Department, toptype, v, "全国"))
+		}
+		for _, key := range keys {
+			fiveData := j.RedisFiveData[key]
+			if fiveData == nil {
+				fiveData = &[]*map[string]interface{}{}
+				j.RedisFiveData[key] = fiveData
+				redisDatas, _ := redis.Get("filter", key).([]interface{})
+				for _, rsd := range redisDatas {
+					var rddm map[string]interface{}
+					rsdByte, err := json.Marshal(rsd)
+					if err != nil {
+						log.Println(err)
+						continue
+					}
+					if err := json.Unmarshal(rsdByte, &rddm); err != nil {
+						log.Println(err)
+						continue
+					}
+					if rddm != nil && len(rddm) > 0 {
+						*fiveData = append(*fiveData, &rddm)
+					}
+				}
+			}
+			for _, data := range *fiveData {
+				//buyer/projectname/projectcode三个有两个相等即为重复
+				r_publishtime := qutil.Int64All((*data)["publishtime"])
+				r_buyer := qutil.ObjToString((*data)["buyer"])
+				r_projectname := qutil.ObjToString((*data)["projectname"])
+				r_projectcode := qutil.ObjToString((*data)["projectcode"])
+				//只判断发布时间在5天之内的
+				if math.Abs(qutil.Float64All(publishtime-r_publishtime)) > 432000 {
+					continue
+				}
+				if buyer == r_buyer && (projectname == r_projectname || projectcode == r_projectcode) {
+					isRepeat = true
+					break L
+				}
+				if projectname == r_projectname && (buyer == r_buyer || projectcode == r_projectcode) {
+					isRepeat = true
+					break L
+				}
+				if projectcode == r_projectcode && (buyer == r_buyer || projectname == r_projectname) {
+					isRepeat = true
+					break L
+				}
+				//标题长度大于10且相等即为重复
+				r_title := qutil.ObjToString((*data)["title"])
+				if len([]rune(title)) > 10 && title == r_title {
+					isRepeat = true
+					break L
+				}
+				//标题长度大于10且包含关系+buyer/projectname/projectcode/city(全国/A的只判断包含关系即可)相等即为重复
+				r_city := qutil.ObjToString((*data)["city"])
+				if len([]rune(title)) > 10 && len([]rune(r_title)) > 10 && (strings.Contains(title, r_title) || strings.Contains(r_title, title)) {
+					if area == "全国" {
+						isRepeat = true
+						break L
+					}
+					if buyer == r_buyer || projectname == r_projectname || projectcode == r_projectcode || city == r_city {
+						isRepeat = true
+						break L
+					}
+				}
+			}
+		}
+	}
+	if isRepeat {
+		return true
+	}
+	if !isRepeat {
+		data := map[string]interface{}{
+			"title":       title,
+			"buyer":       buyer,
+			"projectname": projectname,
+			"projectcode": projectcode,
+			"city":        city,
+			"publishtime": publishtime,
+		}
+		array := j.RedisFiveData[redisKey]
+		if array == nil {
+			array = &[]*map[string]interface{}{}
+			j.RedisFiveData[redisKey] = array
+		}
+		*array = append(*array, &data)
+	}
+	return false
+}
+
+//更新redis
+func (j *Job) UpdateRedis() {
+	if j.RedisFiveData == nil || len(j.RedisFiveData) == 0 {
+		return
+	}
+	now := time.Now()
+	for k, v := range j.RedisFiveData {
+		if v == nil || len(*v) == 0 {
+			continue
+		}
+		createtime, _ := time.ParseInLocation(qutil.Date_yyyyMMdd, strings.Split(k, "_")[3], time.Local)
+		timeout := createtime.AddDate(0, 0, 5).Sub(now).Seconds()
+		if timeout > 0 {
+			redis.Put("filter", k, v, qutil.IntAll(timeout))
+		}
 	}
+	j.RedisFiveData = map[string]*[]*map[string]interface{}{}
 }

+ 3 - 2
jyinfomatch/src/qyfw/loadDatas.go

@@ -26,6 +26,7 @@ var (
 	eachpool        = make(chan bool, 100)
 	searchWaitGroup = &sync.WaitGroup{}
 	MaxSearch       = 10000 //缓存中总共加载这么多条
+
 )
 
 func LoadDatasByEs(Config *map[string]interface{}) bool {
@@ -67,9 +68,9 @@ func LoadDatasByEs(Config *map[string]interface{}) bool {
 //加载数据到内存中
 func initBiddingCache(_id, lastid string, lastTime int64, startTime, endTime int64) (bool, []map[string]interface{}) {
 	c_query := fmt.Sprintf(FilterQuery, fmt.Sprintf(IDRange, _id, lastid, lastTime-7*86400))
+	//	testquery := `{"terms":{"_id":["596f21935d11e1c7455ddc78"]}}`
+	//	c_query = fmt.Sprintf(FilterQuery, testquery)
 	log.Println("es query:", c_query)
-	//testquery := `{"terms":{"_id":["596f21935d11e1c7455ddc7f"]}}`
-	//c_query = fmt.Sprintf(FilterQuery, testquery)
 	count := int(elastic.Count(DB, DB, c_query))
 	log.Println("本次共查到数据", count, "条")
 	var res []map[string]interface{}

+ 28 - 1
jyinfomatch/src/qyfw/script.go

@@ -52,6 +52,33 @@ func (s *Script) LoadScript(name, script_file string, isValidate bool) bool {
 }
 
 //取得变量
-func (s *Script) GetVar(key string) string {
+func (s *Script) GetStringVar(key string) string {
 	return s.L.GetGlobal(key).String()
 }
+
+//
+func (s *Script) GetIntVar(key string) int {
+	lv := s.L.GetGlobal(key)
+	if v, ok := lv.(lua.LNumber); ok {
+		return int(v)
+	}
+	return -1
+}
+
+//
+func (s *Script) GetBoolVar(key string) bool {
+	lv := s.L.GetGlobal(key)
+	if v, ok := lv.(lua.LBool); ok {
+		return bool(v)
+	}
+	return false
+}
+
+//
+func (s *Script) GetFloatVar(key string) float64 {
+	lv := s.L.GetGlobal(key)
+	if v, ok := lv.(lua.LNumber); ok {
+		return float64(v)
+	}
+	return -1
+}

+ 13 - 0
jyinfomatch/src/util/util.go

@@ -2,6 +2,8 @@ package util
 
 import (
 	"fmt"
+	qutil "qfw/util"
+	"time"
 
 	"github.com/yuin/gopher-lua"
 )
@@ -50,3 +52,14 @@ func ArrayDuplicate(array []string) []string {
 	}
 	return temp
 }
+
+//获取最近n天的日期
+func GetLatelyFiveDay(n int) []string {
+	array := make([]string, n)
+	now := time.Now()
+	for i := 0; i < n; i++ {
+		array[i] = qutil.FormatDate(&now, qutil.Date_yyyyMMdd)
+		now = now.AddDate(0, 0, -1)
+	}
+	return array
+}