Browse Source

判重修改,监听站点修改-

apple 4 years ago
parent
commit
93fdc0aa9c

+ 11 - 11
data_monitoring/listen_data/src/config.json

@@ -1,31 +1,31 @@
 {
   "save_mgodb": {
-    "addr": "192.168.3.207:27092",
-    "db": "zhengkun",
-    "coll": "site_unique_new",
+    "addr": "172.17.4.87:27080",
+    "db": "editor",
+    "coll": "monitor_site",
     "pool": 5
   },
   "site_mgodb": {
-    "addr": "192.168.3.207:27092",
-    "db": "zhengkun",
-    "coll": "site_unique_new",
+    "addr": "172.17.4.87:27080",
+    "db": "spider",
+    "coll": "data_bak",
     "pool": 5
   },
   "python_mgodb": {
-    "addr": "192.168.3.207:27092",
-    "db": "zhengkun",
-    "coll": "site_unique_new",
+    "addr": "172.17.4.187:27082,172.17.145.163:27083",
+    "db": "qfw",
+    "coll": "bidding",
     "pool": 5
   },
   "site_unique_name":"site_unique_new",
   "smtpMail": {
     "from": "zhengkun@topnet.net.cn",
-    "to": "zhengkun@topnet.net.cn,fengweiqiang@topnet.net.cn",
+    "to": "zhengkun@topnet.net.cn,chenjiakang@topnet.net.cn",
     "cc": "zhengkun@topnet.net.cn",
     "smtpHost": "smtp.qq.com",
     "smtpPort": "465",
     "user":     "920032221@qq.com",
-    "pwd":      "xomkphsjsamybdbj"
+    "pwd":      "lktqxssmdkebbcbj"
   },
   "xlsx_name" : "site_data.xlsx"
 }

+ 232 - 18
data_monitoring/listen_data/src/main.go

@@ -1,15 +1,22 @@
 package main
 
 import (
+	"fmt"
+	"github.com/cron"
+	"github.com/tealeg/xlsx"
 	"github.com/xuri/excelize"
+	"go.mongodb.org/mongo-driver/bson/primitive"
 	"log"
+	"os"
 	qu "qfw/util"
+	"time"
 )
 
 var (
 	sysconfig    							map[string]interface{} //配置文件
 	save_mgo,site_mgo,python_mgo        	*MongodbSim            //mongodb操作对象
 	save_c_name,site_c_name,python_c_name,xlsx_name,site_unique_name	string
+	prepareData								[]map[string]interface{}
 )
 
 func initMgo()  {
@@ -23,24 +30,28 @@ func initMgo()  {
 	}
 	save_mgo.InitPool()
 
-	//siteconf := sysconfig["site_mgodb"].(map[string]interface{})
-	//site_c_name = qu.ObjToString(siteconf["coll"])
-	//site_mgo = &MongodbSim{
-	//	MongodbAddr: siteconf["addr"].(string),
-	//	DbName:      siteconf["db"].(string),
-	//	Size:        qu.IntAllDef(siteconf["pool"], 5),
-	//}
-	//site_mgo.InitPool()
-	//
-	//
-	//pconf := sysconfig["python_mgodb"].(map[string]interface{})
-	//python_c_name = qu.ObjToString(pconf["coll"])
-	//python_mgo = &MongodbSim{
-	//	MongodbAddr: pconf["addr"].(string),
-	//	DbName:      pconf["db"].(string),
-	//	Size:        qu.IntAllDef(pconf["pool"], 5),
-	//}
-	//python_mgo.InitPool()
+	siteconf := sysconfig["site_mgodb"].(map[string]interface{})
+	site_c_name = qu.ObjToString(siteconf["coll"])
+	site_mgo = &MongodbSim{
+		MongodbAddr: siteconf["addr"].(string),
+		DbName:      siteconf["db"].(string),
+		Size:        qu.IntAllDef(siteconf["pool"], 5),
+	}
+	site_mgo.InitPool()
+
+
+	pconf := sysconfig["python_mgodb"].(map[string]interface{})
+	python_c_name = qu.ObjToString(pconf["coll"])
+	python_mgo = &MongodbSim{
+		MongodbAddr: pconf["addr"].(string),
+		DbName:      pconf["db"].(string),
+		Size:        qu.IntAllDef(pconf["pool"], 5),
+		//Password:	 "zk@123123",
+		//UserName:	 "zhengkun",
+		Password:	 "datazy@read",
+		UserName:	 "dataZY",
+	}
+	python_mgo.InitPool()
 
 
 	site_unique_name = qu.ObjToString(sysconfig["site_unique_name"])
@@ -57,8 +68,211 @@ func init() {
 	initMgo()
 }
 
+
 func main()  {
 
+	c := cron.New()
+	c.AddFunc("0 50 8 ? * *", func() { dealWithSiteData() })
+	c.Start()
+	log.Println("测试立即执行")
+	dealWithSiteData()
+}
+
+func dealWithSiteData()  {
+
+	prepareXlsxSiteData()//准备数据
+
+	if prepareData==nil || len(prepareData)==0{
+		log.Println("异常:无数据")
+		return
+	}
+
+	now:=time.Now()
+	durdays:=7 //周期7天 假设今天5月15日   周六 now.Day()
+	start:= time.Date(now.Year(), now.Month(), now.Day()-durdays, 0, 0, 0, 0, time.Local).Unix()
+	end := time.Date(now.Year(), now.Month(), now.Day(), 0, 0, 0, 0, time.Local).Unix()
+
+	log.Println(start,end)
+	q := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$gte":  start,
+			"$lt": end,
+		},
+	}
+	//构建时间模型模型数据
+	sitedata ,pythondata:= make(map[string]map[string]int,0),make(map[string]map[string]map[string]interface{},0)
+	siteTimeArr := []string{}
+	for i:=0;i<durdays ; i++ {
+		t := int64(i*86400)+start
+		time_key :=TimeStampToString(t)
+		time_day := GetOneWeekDay(time_key)
+		if time_day==7||time_day==6 {}else {
+			sitedata[time_key] = map[string]int{}
+			pythondata[time_key] = map[string]map[string]interface{}{}
+			siteTimeArr = append(siteTimeArr,time_key)
+		}
+	}
+	sess_site := site_mgo.GetMgoConn()
+	defer site_mgo.DestoryMongoConn(sess_site)
+	log.Println("data_bak 查询条件:",q)
+	it_site := sess_site.DB(site_mgo.DbName).C(site_c_name).Find(&q).Sort("comeintime").Select(map[string]interface{}{
+		"comeintime":        1,
+		"site":1,
+	}).Iter()
+	total:= 0
+	for tmp := make(map[string]interface{}); it_site.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Println("current:", total)
+		}
+
+		site:=qu.ObjToString(tmp["site"])
+		comeintime:=qu.Int64All(tmp["comeintime"])
+		time_key :=TimeStampToString(comeintime)
+		time_day := GetOneWeekDay(time_key)
+		if time_day==6 ||time_day==7 {}else {
+			timedata := sitedata[time_key]
+			if qu.Int64All(timedata[site])==0 {
+				timedata[site] =1
+			}else {
+				num :=timedata[site]
+				timedata[site] = num+1
+			}
+		}
+		tmp = make(map[string]interface{})
+	}
+
+	log.Println("is site over :",total)
+
+
+
+	sess_python := python_mgo.GetMgoConn()
+	defer python_mgo.DestoryMongoConn(sess_python)
+	log.Println("bidding 查询条件:",q)
+	it_python := sess_python.DB(python_mgo.DbName).C(python_c_name).Find(&q).Sort("comeintime").Select(map[string]interface{}{
+		"comeintime":        1,
+		"site":1,
+		"spidercode":1,
+	}).Iter()
+	total= 0
+	for tmp := make(map[string]interface{}); it_python.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Println("current:", total)
+		}
+		site:=qu.ObjToString(tmp["site"])
+		spidercode:=qu.ObjToString(tmp["spidercode"])
+		comeintime:=qu.Int64All(tmp["comeintime"])
+
+		time_key :=TimeStampToString(comeintime)
+		time_day := GetOneWeekDay(time_key)
+		if time_day==6 ||time_day==7 {}else {
+			timedata := pythondata[time_key]
+			if timedata[site]==nil {
+				timedata[site] = map[string]interface{}{
+					spidercode:1,
+				}
+			}else {
+				dict :=timedata[site]
+				num := qu.IntAll(dict[spidercode])
+				timedata[site][spidercode] = num+1
+			}
+		}
+
+		tmp = make(map[string]interface{})
+	}
+
+	log.Println("is python over :",total)
+
+	os.Remove(xlsx_name)
+	f :=xlsx.NewFile()
+
+	for _,tmp:=range prepareData {
+		type_name := qu.ObjToString(tmp["type"])
+		data, _ := tmp["data"].(primitive.A)
+		dataArr := qu.ObjArrToMapArr(data)
+		sheet, _ := f.AddSheet(type_name)
+		row := sheet.AddRow()
+		row.AddCell().Value = "国家/省份"
+		row.AddCell().Value = "站点名称"
+		if type_name == "python" {
+			row.AddCell().Value = "爬虫名称"
+		}
+		row.AddCell().Value = "负责人"
+		for _, timekey := range siteTimeArr {
+			row.AddCell().SetString(fmt.Sprintf("%s入库量", timekey))
+		}
+		row.AddCell().Value = "前五天总量"
+
+		for _, dict := range dataArr {
+			row = sheet.AddRow()
+			row.AddCell().SetString(qu.ObjToString(dict["area"]))
+			row.AddCell().SetString(qu.ObjToString(dict["site"]))
+			if type_name == "python" {
+				row.AddCell().SetString(qu.ObjToString(dict["spidercode"]))
+			}
+			row.AddCell().SetString(qu.ObjToString(dict["person"]))
+			total_num := 0
+			if type_name == "python" {
+				for _,timekey:=range siteTimeArr{
+					site_key := qu.ObjToString(dict["site"])
+					spider_key := qu.ObjToString(dict["spidercode"])
+					num := qu.IntAll(pythondata[timekey][site_key][spider_key])
+					total_num = total_num+num
+					row.AddCell().SetString(fmt.Sprintf("%d",num))
+				}
+				row.AddCell().SetString(fmt.Sprintf("%d",total_num))
+			}else {
+				for _,timekey:=range siteTimeArr{
+					key := qu.ObjToString(dict["site"])
+					num := qu.IntAll(sitedata[timekey][key])
+					total_num = total_num+num
+					row.AddCell().SetString(fmt.Sprintf("%d",num))
+				}
+				row.AddCell().SetString(fmt.Sprintf("%d",total_num))
+			}
+
+
+		}
+
+	}
+
+	err := f.Save(xlsx_name)
+	if err != nil {
+		log.Println("保存xlsx失败:", err)
+		return
+	}else {
+		log.Println("保存xlsx成功:", err)
+	}
+
+
+	//是否存日志- 待定
+
+
+	time.Sleep(5*time.Second)
+
+	//发送邮件
+	sendErrMailSmtp("主要站点最近五个工作日相关统计","附件")
+
+	log.Println("结束......")
+
+	//
+
+}
+
+//准备模板站点数据
+func prepareXlsxSiteData()  {
+
+	prepareData = make([]map[string]interface{},0)
+	sess := save_mgo.GetMgoConn()
+	defer save_mgo.DestoryMongoConn(sess)
+	q:=map[string]interface{}{}
+	it := sess.DB(save_mgo.DbName).C(site_unique_name).Find(&q).Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp);{
+		dict := tmp
+		delete(dict,"_id")
+		prepareData = append(prepareData,dict)
+		tmp = make(map[string]interface{})
+	}
+	log.Println("准备完毕... ...")
 }
 
 

+ 31 - 0
data_monitoring/listen_data/src/mark

@@ -0,0 +1,31 @@
+{
+  "save_mgodb": {
+    "addr": "192.168.3.207:27092",
+    "db": "zhengkun",
+    "coll": "site_unique_new",
+    "pool": 5
+  },
+  "site_mgodb": {
+    "addr": "192.168.3.207:27092",
+    "db": "zhengkun",
+    "coll": "data_bak_copy",
+    "pool": 5
+  },
+  "python_mgodb": {
+    "addr": "192.168.3.207:27092",
+    "db": "zhengkun",
+    "coll": "bidding_copy",
+    "pool": 5
+  },
+  "site_unique_name":"site_unique_new",
+  "smtpMail": {
+    "from": "zhengkun@topnet.net.cn",
+    "to": "zhaoyujian@topnet.net.cn,fengweiqiang@topnet.net.cn",
+    "cc": "zhengkun@topnet.net.cn",
+    "smtpHost": "smtp.qq.com",
+    "smtpPort": "465",
+    "user":     "920032221@qq.com",
+    "pwd":      "lktqxssmdkebbcbj"
+  },
+  "xlsx_name" : "site_data.xlsx"
+}

+ 61 - 0
data_monitoring/listen_data/src/sendmail.go

@@ -0,0 +1,61 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"qfw/util/mail"
+	"os"
+	qu "qfw/util"
+)
+var tomail string
+var api string
+var from,to,cc, smtpHost,user,pwd string
+var smtpPort int
+//api模式 二选一皆可
+func sendErrMailApi(title,body string)  {
+	jkmail, _ := sysconfig["jkmail"].(map[string]interface{})
+	if jkmail != nil {
+		tomail, _ = jkmail["to"].(string)
+		api, _ = jkmail["api"].(string)
+	}
+	log.Println(tomail,api)
+	res, err := http.Get(fmt.Sprintf("%s?to=%s&title=%s&body=%s", api, tomail, title, body))
+	if err == nil {
+		defer res.Body.Close()
+		read, err := ioutil.ReadAll(res.Body)
+		log.Println("邮件发送成功:", string(read), err)
+	}else {
+		log.Println("邮件发送失败:", err)
+	}
+}
+
+func sendErrMailSmtp(title,body string) {
+
+	smtpMail, _ := sysconfig["smtpMail"].(map[string]interface{})
+	if smtpMail != nil {
+		from, _ = smtpMail["from"].(string)
+		to, _ = smtpMail["to"].(string)
+		cc, _ = smtpMail["cc"].(string)
+		smtpHost, _ = smtpMail["smtpHost"].(string)
+		smtpPort= qu.IntAll(smtpMail["smtpPort"])
+		user, _ = smtpMail["user"].(string)
+		pwd, _ = smtpMail["pwd"].(string)
+	}
+	f, _ := os.Open(xlsx_name)
+	b, err := ioutil.ReadAll(f)
+	if err != nil {
+		fmt.Println("err:",err)
+		return
+	}
+
+
+	ok := mail.GSendMail_Bq(from, to, cc, cc, title, body, f.Name(), b, &mail.GmailAuth{
+		SmtpHost: smtpHost,
+		SmtpPort: smtpPort,
+		User:     user,
+		Pwd:      pwd,
+	})
+	fmt.Println(ok)
+}

+ 120 - 0
data_monitoring/listen_data/src/weekday.go

@@ -0,0 +1,120 @@
+package main
+
+import (
+	"log"
+	"time"
+)
+
+var WeekDayMap = map[string]int64{
+	"Monday":    1,
+	"Tuesday":   2,
+	"Wednesday": 3,
+	"Thursday":  4,
+	"Friday":    5,
+	"Saturday":  6,
+	"Sunday":    7,
+}
+
+// 获取输入日期分别是星期几
+func GetOneWeekDay(startime string) (int64) {
+	startday, _ := time.Parse("2006-01-02", startime)
+	staweek_int := startday.Weekday().String()
+	return WeekDayMap[staweek_int]
+}
+
+
+// 获取输入的两个日期分别是星期几
+func GetWeekDay(startime, endtim string) (int64, int64) {
+	startday, _ := time.Parse("2006-01-02", startime)
+	endday, _ := time.Parse("2006-01-02", endtim)
+	staweek_int := startday.Weekday().String()
+	endweek_int := endday.Weekday().String()
+	return WeekDayMap[staweek_int], WeekDayMap[endweek_int]
+}
+
+
+// 字符串转时间戳
+func StringToTimeStamp(strTime string) int64 {
+	timeLayout := "2006-01-02"
+	//timeLayout := "2006-01-02 15:04:05"
+	loc, _ := time.LoadLocation("Local")
+	the_time, err := time.ParseInLocation(timeLayout, strTime, loc)
+	if err != nil {
+		log.Println("StringToTimeStamp出现异常:", err)
+	}
+	unix_time := the_time.Unix()
+	return unix_time
+}
+
+// 时间戳转 字符串
+func TimeStampToString(timeStp int64) string {
+	//转化所需模板
+	//timeLayout := "2006-01-02 15:04:05"
+	timeLayout := "2006-01-02"
+	//进行格式化
+	datetime := time.Unix(timeStp, 0).Format(timeLayout)
+	return datetime
+}
+
+
+// 时间转化为周日期列表
+func ChangeToWeek(startime, endtim string) []map[string]string {
+
+	staweek_int, endweek_int := GetWeekDay(startime, endtim)
+	// 获取时间戳
+	start_stamp := StringToTimeStamp(startime)
+	end_stamp := StringToTimeStamp(endtim)
+	log.Println("start_stamp==",start_stamp,"end_stamp==", end_stamp)
+
+	var week_list = make([]map[string]string, 0)
+	if (end_stamp-start_stamp)/604800 <= 1 && endweek_int-staweek_int >= 0 {
+		if end_stamp-start_stamp < 604800 && endweek_int-staweek_int > 0 {
+			one_map := map[string]string{}
+			mon_one := TimeStampToString(start_stamp - (staweek_int-1)*86400)
+			sun_one := TimeStampToString(start_stamp + (7-staweek_int)*86400)
+			one_map["mon"] = mon_one
+			one_map["sun"] = sun_one
+			week_list = append(week_list, one_map)
+			return week_list
+		}
+		one_map := map[string]string{}
+		mon_one := TimeStampToString(start_stamp - (staweek_int-1)*86400)
+		sun_one := TimeStampToString(start_stamp + (7-staweek_int)*86400)
+		one_map["mon"] = mon_one
+		one_map["sun"] = sun_one
+		week_list = append(week_list, one_map)
+		tow_map := map[string]string{}
+		mon_tow := TimeStampToString(end_stamp - (endweek_int-1)*86400)
+		sun_tow := TimeStampToString(end_stamp + (7-endweek_int)*86400)
+		tow_map["mon"] = mon_tow
+		tow_map["sun"] = sun_tow
+		week_list = append(week_list, tow_map)
+		return week_list
+	}
+	week_n := (end_stamp - start_stamp) / 604800
+	one_map := map[string]string{}
+	mon_one := TimeStampToString(start_stamp - (staweek_int-1)*86400)
+	sun_one := TimeStampToString(start_stamp + (7-staweek_int)*86400)
+	one_map["mon"] = mon_one
+	one_map["sun"] = sun_one
+	week_list = append(week_list, one_map)
+	for i := 1; i <= int(week_n); i++ {
+		week_map := map[string]string{}
+		mon_day := TimeStampToString(start_stamp - (staweek_int-1)*86400 + int64(i)*604800)
+		sun_day := TimeStampToString(start_stamp + (7-staweek_int)*86400 + int64(i)*604800)
+		week_map["mon"] = mon_day
+		week_map["sun"] = sun_day
+		week_list = append(week_list, week_map)
+	}
+	if endweek_int-staweek_int >= 0 {
+		return week_list
+	}
+	tow_map := map[string]string{}
+	mon_tow := TimeStampToString(end_stamp - (endweek_int-1)*86400)
+	sun_tow := TimeStampToString(end_stamp + (7-endweek_int)*86400)
+	tow_map["mon"] = mon_tow
+	tow_map["sun"] = sun_tow
+	week_list = append(week_list, tow_map)
+	return week_list
+
+}

+ 4 - 0
udpfilterdup/src/dataMethod.go

@@ -74,6 +74,10 @@ func againRepeat(v *Info, info *Info) bool {
 	if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
 		return true
 	}
+	if v.title != info.title && v.title != "" && info.title != ""{
+		return true
+	}
+
 
 	return false
 }

+ 1 - 1
udpfilterdup/src/updateMethod.go

@@ -45,7 +45,7 @@ func (update *updateInfo) updateData() {
 				tmpArr = make([][]map[string]interface{}, update.saveSize)
 				tmpIndex = 0
 			}
-		case <-time.After(10 * time.Second)://无反应时每x秒检测一次
+		case <-time.After(5 * time.Second)://无反应时每x秒检测一次
 			if tmpIndex > 0 {
 				sp <- true
 				go func(dataArr [][]map[string]interface{}) {