sitecount.go 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
  1. package luatask
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "fmt"
  6. "github.com/donnie4w/go-logger/logger"
  7. "github.com/imroc/req"
  8. "github.com/tealeg/xlsx"
  9. "net/http"
  10. qu "qfw/util"
  11. "strconv"
  12. "sync"
  13. "time"
  14. "util"
  15. )
  16. type SiteInfo struct {
  17. Site string `json:"site"` //站点
  18. Num int `json:"averagenum"` //每日网站发布平均量
  19. Modifyuser string `json:"modifyuser"` //维护人
  20. State string `json:"state"` //网站状态
  21. Domain string `json:"domain"` //域名
  22. Stype string `json:"stype"` //网站类型
  23. Platform string `json:"platform"` //所属平台
  24. Coverage string `json:"coverage"` //覆盖率
  25. ListAllNum int `json:"listallnum"` //href去重,当天采集数据量
  26. ListSuccessNum int `json:"listsuccessnum"` //href去重,当天采集成功数据量
  27. PTimeSuccessNum int `json:"ptimesuccessnum"` //href去重,当天发布采集成功数据量
  28. PTimeSuccessDbNum int `json:"ptimesuccessdbnum"` //href去重,data_bak当天发布采集成功数据量
  29. ThreeDaysAgoNum int `json:"threedaysagonum"` //三天前当天的数据量再次统计(有些站点发布延迟导致当天数据量不准确,再次统计)
  30. BeforeThreeDaysAgoNum int `json:"beforethreedaysagonum"` //三天前当天的数据量历史统计
  31. Comeintime int64 `json:"comeintime"` //href去重,当天发布采集成功数据量
  32. }
  33. var SiteInfoModel = `{
  34. "msgtype": "file",
  35. "file": {
  36. "media_id": "%s"
  37. }
  38. }`
  39. var LuaListDownloadAllNum int64
  40. var LuaListDownloadSuccessAllNum int64
  41. var LuaBiddingDownloadAllNum int64
  42. var PythonListDownloadAllNum int64
  43. var PythonListDownloadSuccessAllNum int64
  44. var PythonBiddingDownloadAllNum int64
  45. var LuaPythonNumModel = `{
  46. "msgtype": "text",
  47. "text": {
  48. "content": "%s"
  49. }
  50. }`
  51. var MarkdownModel = `{
  52. "msgtype": "markdown",
  53. "markdown": {
  54. "content": "%s"
  55. }
  56. }`
  57. var NumContentModel = `
  58. >平台:<font color=\"warning\">%s</font>
  59. >列表页采集量:<font color=\"warning\">%d</font>
  60. >列表页采集成功量:<font color=\"warning\">%d</font>\n
  61. >Bidding成功量:<font color=\"warning\">%d</font>\n
  62. `
  63. //var AllHref map[string]string
  64. //重点网站每日采集量统计
  65. func SendInfoToWxWork_SiteDataCount() {
  66. defer qu.Catch()
  67. //AllHref = map[string]string{} //初始化
  68. //一、统计
  69. //1、查站点的基础信息
  70. //siteInfoMap := map[string]*SiteInfo{}
  71. //siteInfoMap_Back := map[string]*SiteInfo{}
  72. allSpiderMap := map[string]*SiteInfo{}
  73. list, _ := util.MgoE.Find("site_baseinfo", nil, nil, nil, false, -1, -1)
  74. for _, l := range *list {
  75. site := qu.ObjToString(l["site"])
  76. vByte, _ := json.Marshal(l)
  77. //siteInfo1 := &SiteInfo{}
  78. //siteInfo2 := &SiteInfo{}
  79. siteInfo3 := &SiteInfo{}
  80. //json.Unmarshal(vByte, siteInfo1)
  81. //json.Unmarshal(vByte, siteInfo2)
  82. json.Unmarshal(vByte, siteInfo3)
  83. //siteInfoMap[site] = siteInfo1
  84. //siteInfoMap_Back[site] = siteInfo2
  85. siteInfo3.Comeintime = time.Now().Unix()
  86. allSpiderMap[site] = siteInfo3
  87. }
  88. //stime := util.GetTime(-1)
  89. //etime := util.GetTime(0)
  90. //ptime := qu.FormatDateByInt64(&stime, qu.Date_Short_Layout)
  91. //qu.Debug(stime, etime, ptime)
  92. //2、统计在spider_highlistdata中站点数据量
  93. //GetHighListDataNum(stime, etime, ptime, siteInfoMap)
  94. //3、统计spider_listdata站点数据
  95. //GetListDataNum(stime, etime, ptime, siteInfoMap)
  96. //4、统计data_bak
  97. //GetDataBakNum(stime, etime, siteInfoMap)
  98. //5、单独统计data_bak
  99. //GetDataBakNum_Back(stime, etime, siteInfoMap_Back)
  100. //6、聚合spidercode统计结果
  101. GetAllSpidercodeNum(allSpiderMap)
  102. //6、汇总excel
  103. //GetSiteInfoExcel(siteInfoMap, siteInfoMap_Back, allSpiderMap)
  104. day := GetThreeDaysAgoNum(allSpiderMap)
  105. GetSiteInfoExcel(allSpiderMap, day)
  106. }
  107. func GetAllSpidercodeNum(siteInfoMap map[string]*SiteInfo) {
  108. defer qu.Catch()
  109. logger.Info("统计采集量luacodeinfo开始...")
  110. sess := util.MgoE.GetMgoConn()
  111. defer util.MgoE.DestoryMongoConn(sess)
  112. query := map[string]interface{}{
  113. "comeintime": map[string]interface{}{
  114. "$gte": util.GetTime(0),
  115. },
  116. }
  117. fields := map[string]interface{}{
  118. "repeatptimesuccessnum": 1,
  119. "repeatptimesuccessdbnum": 1,
  120. "repeatdownloadallnum": 1,
  121. "repeatdownloadsuccessnum": 1,
  122. "site": 1,
  123. "platform": 1,
  124. }
  125. lock := &sync.Mutex{}
  126. wg := &sync.WaitGroup{}
  127. ch := make(chan bool, 5)
  128. it := sess.DB(util.MgoE.DbName).C("luacodeinfo").Find(&query).Select(&fields).Iter()
  129. n := 0
  130. for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  131. wg.Add(1)
  132. ch <- true
  133. go func(tmp map[string]interface{}) {
  134. defer func() {
  135. <-ch
  136. wg.Done()
  137. }()
  138. site := qu.ObjToString(tmp["site"])
  139. platform := qu.ObjToString(tmp["platform"])
  140. repeatdownloadallnum := qu.IntAll(tmp["repeatdownloadallnum"])
  141. repeatdownloadsuccessnum := qu.IntAll(tmp["repeatdownloadsuccessnum"])
  142. repeatptimesuccessnum := qu.IntAll(tmp["repeatptimesuccessnum"])
  143. repeatptimesuccessdbnum := qu.IntAll(tmp["repeatptimesuccessdbnum"])
  144. if platform == "python" {
  145. site = site + "(python)"
  146. }
  147. lock.Lock()
  148. if info := siteInfoMap[site]; info != nil { //匹配要统计的重点网站
  149. info.ListAllNum += repeatdownloadallnum
  150. info.ListSuccessNum += repeatdownloadsuccessnum
  151. info.PTimeSuccessNum += repeatptimesuccessnum
  152. info.PTimeSuccessDbNum += repeatptimesuccessdbnum
  153. }
  154. lock.Unlock()
  155. }(tmp)
  156. if n%100 == 0 {
  157. logger.Debug(n)
  158. }
  159. tmp = map[string]interface{}{}
  160. }
  161. wg.Wait()
  162. logger.Debug("统计采集量luacodeinfo完成...")
  163. }
  164. func GetThreeDaysAgoNum(siteInfoMap map[string]*SiteInfo) (strStime string) {
  165. defer qu.Catch()
  166. //1、获取三个工作日之前的日期
  167. baseDay := 3
  168. for i := 1; i <= baseDay; i++ { //除去三天内周六周日
  169. beforDay := time.Now().AddDate(0, 0, -i)
  170. if weekDay := beforDay.Weekday().String(); weekDay == "Saturday" || weekDay == "Sunday" {
  171. baseDay++
  172. }
  173. }
  174. logger.Info("baseday:", baseDay)
  175. stime := util.GetTime(-baseDay) //起始时间戳(三个工作日前)
  176. strStime = qu.FormatDateByInt64(&stime, qu.Date_Short_Layout) //起始日期
  177. logger.Info("查询天:", stime, strStime)
  178. //3、统计数据量
  179. GetSpiderHighListDataNum(stime, strStime, siteInfoMap) //spider_highlistdata
  180. GetSpiderListDataNum(stime, strStime, siteInfoMap) //spider_listdata
  181. GetPythonDataNum(stime, strStime, siteInfoMap)
  182. GetNumByLastTime(stime, baseDay, siteInfoMap)
  183. return
  184. }
  185. func GetSpiderHighListDataNum(stime int64, strStime string, siteInfoMap map[string]*SiteInfo) {
  186. defer qu.Catch()
  187. sess := util.MgoS.GetMgoConn()
  188. defer util.MgoS.DestoryMongoConn(sess)
  189. HrefRepeatMap := map[string]string{}
  190. lock := &sync.Mutex{}
  191. wg := &sync.WaitGroup{}
  192. ch := make(chan bool, 5)
  193. query := map[string]interface{}{
  194. "comeintime": map[string]interface{}{
  195. "$gte": stime,
  196. },
  197. "publishtime": map[string]interface{}{
  198. "$regex": strStime,
  199. },
  200. }
  201. fieles := map[string]interface{}{
  202. "href": 1,
  203. "site": 1,
  204. }
  205. it := sess.DB(util.MgoS.DbName).C("spider_highlistdata").Find(&query).Select(&fieles).Iter()
  206. n := 0
  207. for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
  208. wg.Add(1)
  209. ch <- true
  210. go func(tmp map[string]interface{}) {
  211. defer func() {
  212. <-ch
  213. wg.Done()
  214. }()
  215. site := qu.ObjToString(tmp["site"])
  216. lock.Lock()
  217. if sInfo := siteInfoMap[site]; sInfo != nil { //要统计的重点站点
  218. href := qu.ObjToString(tmp["href"])
  219. if tmpSite := HrefRepeatMap[href]; tmpSite != site { //同站点去重
  220. sInfo.ThreeDaysAgoNum++
  221. HrefRepeatMap[href] = site
  222. }
  223. }
  224. lock.Unlock()
  225. }(tmp)
  226. if n%1000 == 0 {
  227. logger.Debug(n)
  228. }
  229. tmp = map[string]interface{}{}
  230. }
  231. wg.Wait()
  232. HrefRepeatMap = map[string]string{}
  233. logger.Debug("三天前发布spider_highlistdata统计完毕...")
  234. }
  235. func GetSpiderListDataNum(stime int64, strStime string, siteInfoMap map[string]*SiteInfo) {
  236. defer qu.Catch()
  237. sess := util.MgoS.GetMgoConn()
  238. defer util.MgoS.DestoryMongoConn(sess)
  239. lock := &sync.Mutex{}
  240. wg := &sync.WaitGroup{}
  241. ch := make(chan bool, 5)
  242. query := map[string]interface{}{
  243. "comeintime": map[string]interface{}{
  244. "$gte": stime,
  245. },
  246. "publishtime": map[string]interface{}{
  247. "$regex": strStime,
  248. },
  249. }
  250. fieles := map[string]interface{}{
  251. "site": 1,
  252. "event": 1,
  253. }
  254. it := sess.DB(util.MgoS.DbName).C("spider_listdata").Find(&query).Select(&fieles).Iter()
  255. n := 0
  256. for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
  257. wg.Add(1)
  258. ch <- true
  259. go func(tmp map[string]interface{}) {
  260. defer func() {
  261. <-ch
  262. wg.Done()
  263. }()
  264. if qu.IntAll(tmp["event"]) == 7000 { //排除7000节点
  265. return
  266. }
  267. site := qu.ObjToString(tmp["site"])
  268. lock.Lock()
  269. if sInfo := siteInfoMap[site]; sInfo != nil { //要统计的重点站点
  270. sInfo.ThreeDaysAgoNum++
  271. }
  272. lock.Unlock()
  273. }(tmp)
  274. if n%1000 == 0 {
  275. logger.Debug(n)
  276. }
  277. tmp = map[string]interface{}{}
  278. }
  279. wg.Wait()
  280. logger.Debug("三天前发布spider_listdata统计完毕...")
  281. }
  282. func GetPythonDataNum(stime int64, strStime string, siteInfoMap map[string]*SiteInfo) {
  283. defer qu.Catch()
  284. sess := util.MgoPy.GetMgoConn()
  285. defer util.MgoPy.DestoryMongoConn(sess)
  286. lock := &sync.Mutex{}
  287. wg := &sync.WaitGroup{}
  288. ch := make(chan bool, 5)
  289. query := map[string]interface{}{
  290. "comeintime": map[string]interface{}{
  291. "$gte": stime,
  292. },
  293. "publishtime": map[string]interface{}{
  294. "$regex": strStime,
  295. },
  296. }
  297. fieles := map[string]interface{}{
  298. "site": 1,
  299. }
  300. qu.Debug(query)
  301. it := sess.DB(util.MgoPy.DbName).C("data_bak").Find(&query).Select(&fieles).Iter()
  302. n := 0
  303. for tmp := make(map[string]interface{}); it.Next(tmp); n++ {
  304. wg.Add(1)
  305. ch <- true
  306. go func(tmp map[string]interface{}) {
  307. defer func() {
  308. <-ch
  309. wg.Done()
  310. }()
  311. site := qu.ObjToString(tmp["site"]) + "(python)"
  312. lock.Lock()
  313. if sInfo := siteInfoMap[site]; sInfo != nil { //要统计的重点站点
  314. sInfo.ThreeDaysAgoNum++
  315. }
  316. lock.Unlock()
  317. }(tmp)
  318. if n%1000 == 0 {
  319. logger.Debug(n)
  320. }
  321. tmp = map[string]interface{}{}
  322. }
  323. wg.Wait()
  324. logger.Debug("三天前发布python统计完毕...")
  325. }
  326. func GetNumByLastTime(stime int64, baseDay int, siteInfoMap map[string]*SiteInfo) {
  327. defer qu.Catch()
  328. stimeWeekDay := time.Now().AddDate(0, 0, -baseDay).Weekday().String()
  329. start := stime + 86400
  330. end := stime + 86400*2
  331. if stimeWeekDay == "Friday" { //每周五的数据是每周一统计
  332. start = stime + 86400*3
  333. end = stime + 86400*4
  334. }
  335. query := map[string]interface{}{
  336. "comeintime": map[string]interface{}{
  337. "$gte": start,
  338. "$lt": end,
  339. },
  340. }
  341. logger.Info("历史站点统计", query)
  342. list, _ := util.MgoEB.Find("site_datacount", query, nil, map[string]interface{}{"site": 1, "ptimesuccessnum": 1}, false, -1, -1)
  343. for _, l := range *list {
  344. site := qu.ObjToString(l["site"])
  345. pNum := qu.IntAll(l["ptimesuccessnum"])
  346. if sInfo := siteInfoMap[site]; sInfo != nil {
  347. sInfo.BeforeThreeDaysAgoNum = pNum
  348. }
  349. }
  350. }
  351. func GetSiteInfoExcel(allSpiderInfo map[string]*SiteInfo, day string) {
  352. defer qu.Catch()
  353. file, err := xlsx.OpenFile("res/sitecount.xlsx")
  354. if err != nil {
  355. qu.Debug("open file err:", err)
  356. return
  357. }
  358. sheet := file.Sheets[0]
  359. arr := []map[string]interface{}{}
  360. for site, info := range allSpiderInfo {
  361. style := xlsx.NewStyle()
  362. style.ApplyFill = true
  363. style.ApplyFont = true
  364. font := *xlsx.NewFont(10, "Verdana")
  365. style.Font = font
  366. title1 := day + "新统计(publishtime)"
  367. title2 := day + "历史统计(publishtime)"
  368. sheet.Rows[0].Cells[6].SetValue(title1)
  369. sheet.Rows[0].Cells[7].SetValue(title2)
  370. row := sheet.AddRow()
  371. row.AddCell().SetValue(site)
  372. row.AddCell().SetValue(info.Num)
  373. row.AddCell().SetValue(info.ListAllNum)
  374. row.AddCell().SetValue(info.ListSuccessNum)
  375. row.AddCell().SetValue(info.PTimeSuccessNum)
  376. row.AddCell().SetValue(info.PTimeSuccessDbNum)
  377. row.AddCell().SetValue(info.ThreeDaysAgoNum)
  378. row.AddCell().SetValue(info.BeforeThreeDaysAgoNum)
  379. coverage := float64(info.PTimeSuccessNum) / float64(info.Num)
  380. fill := &xlsx.Fill{
  381. PatternType: "solid",
  382. }
  383. if coverage < 0.6 { //
  384. fill.FgColor = "00FF0000"
  385. fill.BgColor = "FF000000"
  386. } else if coverage >= 0.6 && coverage < 0.8 {
  387. fill.FgColor = "00FFCC00"
  388. fill.BgColor = "FF000000"
  389. } else if coverage >= 0.8 && coverage < 1.0 {
  390. fill.FgColor = "00FFFF00"
  391. fill.BgColor = "FF000000"
  392. } else {
  393. fill.FgColor = "0066FF33"
  394. fill.BgColor = "FF000000"
  395. }
  396. style.Fill = *fill
  397. value, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", coverage*100), 64)
  398. result := fmt.Sprint(value) + "%"
  399. info.Coverage = result
  400. cell := row.AddCell()
  401. cell.SetValue(result)
  402. cell.SetStyle(style)
  403. row.AddCell().SetValue(info.Modifyuser)
  404. row.AddCell().SetValue(info.State)
  405. row.AddCell().SetValue(info.Domain)
  406. row.AddCell().SetValue(info.Stype)
  407. row.AddCell().SetValue(info.Platform)
  408. tmp := map[string]interface{}{}
  409. infoByte, err := json.Marshal(*info)
  410. if err == nil {
  411. if json.Unmarshal(infoByte, &tmp) == nil {
  412. arr = append(arr, tmp)
  413. } else {
  414. logger.Info("json unmarshal:", err)
  415. }
  416. } else {
  417. logger.Info("json marshal:", err)
  418. }
  419. }
  420. util.MgoE.SaveBulk("site_datacount", arr...)
  421. arr = []map[string]interface{}{}
  422. //file.Save("res/tmp.xlsx")
  423. SendSiteInfoToWxWork(file)
  424. }
  425. func SendSiteInfoToWxWork(file *xlsx.File) {
  426. defer qu.Catch()
  427. //file, err := xlsx.OpenFile("res/sitecount.xlsx")
  428. //if err != nil {
  429. // qu.Debug("open file err:", err)
  430. // return
  431. //}
  432. mw := &util.MyWrite{
  433. Byte: &bytes.Buffer{},
  434. }
  435. file.Write(mw)
  436. bt := mw.Byte.Bytes()
  437. client := req.C()
  438. filename := Publishtime + "重点网站数据量统计.xlsx"
  439. resp, _ := client.R().
  440. SetHeader("Content-Type", "multipart/form-data").
  441. //SetFiles(map[string]string{"file": string(resp4.Bytes())}).
  442. SetFileReader("file", filename, bytes.NewReader(bt)).
  443. Post("https://qyapi.weixin.qq.com/cgi-bin/webhook/upload_media?key=97850772-88d0-4544-a2c3-6201aeddff9e&type=file")
  444. result := map[string]interface{}{}
  445. mediaId := ""
  446. if json.Unmarshal(resp.Bytes(), &result) == nil {
  447. mediaId = qu.ObjToString(result["media_id"])
  448. } else {
  449. qu.Debug("unmarshal result err")
  450. }
  451. msg := fmt.Sprintf(SiteInfoModel, mediaId)
  452. resp1, err := http.Post(
  453. "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=97850772-88d0-4544-a2c3-6201aeddff9e",
  454. "application/json",
  455. bytes.NewBuffer([]byte(msg)),
  456. )
  457. if err != nil {
  458. fmt.Println("request error:", err)
  459. return
  460. }
  461. defer resp1.Body.Close()
  462. }
  463. func SendLuaPythonAllNum() {
  464. defer qu.Catch()
  465. luaContent := fmt.Sprintf(NumContentModel, "Lua", LuaListDownloadAllNum, LuaListDownloadSuccessAllNum, LuaBiddingDownloadAllNum)
  466. pythonContent := fmt.Sprintf(NumContentModel, "python", PythonListDownloadAllNum, PythonListDownloadSuccessAllNum, PythonBiddingDownloadAllNum)
  467. resultContent := fmt.Sprintf(MarkdownModel, Publishtime+",Lua、Python各维度采集量统计结果如下:\n"+luaContent+pythonContent)
  468. qu.Debug(resultContent)
  469. //保存记录
  470. util.MgoS.Save("spider_luapythoncount", map[string]interface{}{
  471. "lualistnum": LuaListDownloadAllNum,
  472. "lualistsuccessnum": LuaListDownloadSuccessAllNum,
  473. "luabiddingnum": LuaBiddingDownloadAllNum,
  474. "pythonlistnum": PythonListDownloadAllNum,
  475. "pythonlistsuccessnum": PythonListDownloadSuccessAllNum,
  476. "pythonbiddingnum": PythonBiddingDownloadAllNum,
  477. "comeintime": time.Now().Unix(),
  478. "date": Publishtime,
  479. })
  480. //重置
  481. LuaListDownloadAllNum = 0
  482. LuaListDownloadSuccessAllNum = 0
  483. LuaBiddingDownloadAllNum = 0
  484. PythonListDownloadAllNum = 0
  485. PythonListDownloadSuccessAllNum = 0
  486. PythonBiddingDownloadAllNum = 0
  487. //发送统计
  488. resp, err := http.Post(
  489. "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=97850772-88d0-4544-a2c3-6201aeddff9e",
  490. "application/json",
  491. bytes.NewBuffer([]byte(resultContent)),
  492. )
  493. if err != nil {
  494. fmt.Println("request error:", err)
  495. return
  496. }
  497. defer resp.Body.Close()
  498. }
  499. //func GetHighListDataNum(ctime, etime int64, ptime string, siteInfoMap map[string]*SiteInfo) {
  500. // defer qu.Catch()
  501. // sess := util.MgoS.GetMgoConn()
  502. // defer util.MgoS.DestoryMongoConn(sess)
  503. // query := map[string]interface{}{
  504. // "comeintime": map[string]interface{}{
  505. // "$gte": ctime,
  506. // "$lt": etime,
  507. // },
  508. // //"publishtime": map[string]interface{}{
  509. // // "$gte": ctime,
  510. // // "$lt": 1654617600,
  511. // //},
  512. // "publishtime": map[string]interface{}{
  513. // "$regex": ptime,
  514. // },
  515. // }
  516. // fields := map[string]interface{}{
  517. // "state": 1,
  518. // "href": 1,
  519. // "site": 1,
  520. // }
  521. // lock := &sync.Mutex{}
  522. // wg := &sync.WaitGroup{}
  523. // ch := make(chan bool, 3)
  524. // qu.Debug("query:", query)
  525. // it := sess.DB(util.MgoS.DbName).C("spider_highlistdata").Find(&query).Select(&fields).Iter()
  526. // n := 0
  527. // for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  528. // wg.Add(1)
  529. // ch <- true
  530. // go func(tmp map[string]interface{}) {
  531. // defer func() {
  532. // <-ch
  533. // wg.Done()
  534. // }()
  535. // href := qu.ObjToString(tmp["href"])
  536. // state := qu.IntAll(tmp["state"])
  537. // site := qu.ObjToString(tmp["site"])
  538. // lock.Lock()
  539. // if info := siteInfoMap[site]; info != nil && AllHref[href] != site { //匹配要统计的重点网站
  540. // info.ListAllNum++
  541. // if state == 1 {
  542. // info.ListSuccessNum++
  543. // }
  544. // AllHref[href] = site
  545. // }
  546. // lock.Unlock()
  547. // }(tmp)
  548. // if n%1000 == 0 {
  549. // logger.Debug(n)
  550. // }
  551. // tmp = map[string]interface{}{}
  552. // }
  553. // wg.Wait()
  554. //}
  555. //
  556. //func GetListDataNum(ctime, etime int64, ptime string, siteInfoMap map[string]*SiteInfo) {
  557. // defer qu.Catch()
  558. // sess := util.MgoS.GetMgoConn()
  559. // defer util.MgoS.DestoryMongoConn(sess)
  560. // query := map[string]interface{}{
  561. // "comeintime": map[string]interface{}{
  562. // "$gte": ctime,
  563. // "$lt": etime,
  564. // },
  565. // "publishtime": map[string]interface{}{
  566. // "$regex": ptime,
  567. // },
  568. // }
  569. // fields := map[string]interface{}{
  570. // "state": 1,
  571. // "href": 1,
  572. // "site": 1,
  573. // }
  574. // lock := &sync.Mutex{}
  575. // wg := &sync.WaitGroup{}
  576. // ch := make(chan bool, 3)
  577. // it := sess.DB(util.MgoS.DbName).C("spider_listdata").Find(&query).Select(&fields).Iter()
  578. // n := 0
  579. // for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  580. // wg.Add(1)
  581. // ch <- true
  582. // go func(tmp map[string]interface{}) {
  583. // defer func() {
  584. // <-ch
  585. // wg.Done()
  586. // }()
  587. // href := qu.ObjToString(tmp["href"])
  588. // state := qu.IntAll(tmp["state"])
  589. // site := qu.ObjToString(tmp["site"])
  590. // lock.Lock()
  591. // if info := siteInfoMap[site]; info != nil && AllHref[href] != site { //匹配要统计的重点网站
  592. // info.ListAllNum++
  593. // if state == 1 {
  594. // info.ListSuccessNum++
  595. // }
  596. // AllHref[href] = site
  597. // }
  598. // lock.Unlock()
  599. // }(tmp)
  600. // if n%1000 == 0 {
  601. // logger.Debug(n)
  602. // }
  603. // tmp = map[string]interface{}{}
  604. // }
  605. // wg.Wait()
  606. //}
  607. //
  608. //func GetDataBakNum(stime, etime int64, siteInfoMap map[string]*SiteInfo) {
  609. // defer qu.Catch()
  610. // //spider_highlistdata和spider_listdata列表页统计不出的站点,借由data_bak统计
  611. // //存在问题
  612. // //1、列表页无发布时间的,未计入统计
  613. // //2、若果按列表统计后,所有站点再由data_bak统计一遍,列表页、详情页变链接者会多统计
  614. // for site, info := range siteInfoMap {
  615. // if info.ListAllNum == 0 {
  616. // query := map[string]interface{}{
  617. // "comeintime": map[string]interface{}{
  618. // "$gte": stime,
  619. // "$lt": etime,
  620. // },
  621. // "publishtime": map[string]interface{}{
  622. // "$gte": stime,
  623. // "$lt": etime,
  624. // },
  625. // "site": site,
  626. // }
  627. // count := util.MgoS.Count("data_bak", query)
  628. // info.ListAllNum = count
  629. // info.ListSuccessNum = count
  630. // }
  631. // }
  632. // AllHref = map[string]string{}
  633. //}
  634. //
  635. //func GetDataBakNum_Back(stime, etime int64, siteInfoMap map[string]*SiteInfo) {
  636. // defer qu.Catch()
  637. // logger.Info("单独统计data_bak开始...")
  638. // sess := util.MgoS.GetMgoConn()
  639. // defer util.MgoS.DestoryMongoConn(sess)
  640. // query := map[string]interface{}{
  641. // "comeintime": map[string]interface{}{
  642. // "$gte": stime,
  643. // "$lt": etime,
  644. // },
  645. // "publishtime": map[string]interface{}{
  646. // "$gte": stime,
  647. // "$lt": etime,
  648. // },
  649. // }
  650. // fields := map[string]interface{}{
  651. // "href": 1,
  652. // "site": 1,
  653. // }
  654. // lock := &sync.Mutex{}
  655. // wg := &sync.WaitGroup{}
  656. // ch := make(chan bool, 3)
  657. // it := sess.DB(util.MgoS.DbName).C("data_bak").Find(&query).Select(&fields).Iter()
  658. // n := 0
  659. // for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
  660. // wg.Add(1)
  661. // ch <- true
  662. // go func(tmp map[string]interface{}) {
  663. // defer func() {
  664. // <-ch
  665. // wg.Done()
  666. // }()
  667. // href := qu.ObjToString(tmp["href"])
  668. // site := qu.ObjToString(tmp["site"])
  669. // lock.Lock()
  670. // if info := siteInfoMap[site]; info != nil && AllHref[href] != site { //匹配要统计的重点网站
  671. // info.ListAllNum++
  672. // info.ListSuccessNum++
  673. // AllHref[href] = site
  674. // }
  675. // lock.Unlock()
  676. // }(tmp)
  677. // if n%1000 == 0 {
  678. // logger.Debug(n)
  679. // }
  680. // tmp = map[string]interface{}{}
  681. // }
  682. // wg.Wait()
  683. // logger.Info("单独统计data_bak完成...")
  684. //}
  685. //func GetSiteInfoExcel(listInfo, backInfo, allSpiderInfo map[string]*SiteInfo) {
  686. // defer qu.Catch()
  687. // file, err := xlsx.OpenFile("res/sitecount.xlsx")
  688. // if err != nil {
  689. // qu.Debug("open file err:", err)
  690. // return
  691. // }
  692. // infoArr := []map[string]*SiteInfo{listInfo, backInfo, allSpiderInfo}
  693. // for i, sheet := range []*xlsx.Sheet{file.Sheets[0], file.Sheets[1], file.Sheets[2]} {
  694. // for site, info := range infoArr[i] {
  695. // row := sheet.AddRow()
  696. // row.AddCell().SetValue(site)
  697. // row.AddCell().SetValue(info.Num)
  698. // row.AddCell().SetValue(info.ListAllNum)
  699. // row.AddCell().SetValue(info.ListSuccessNum)
  700. // if i == 2 {
  701. // row.AddCell().SetValue(info.PTimeSuccessNum)
  702. // }
  703. // row.AddCell().SetValue(0)
  704. // row.AddCell().SetValue(info.Modifyuser)
  705. // row.AddCell().SetValue(info.State)
  706. // row.AddCell().SetValue(info.Domain)
  707. // row.AddCell().SetValue(info.Stype)
  708. // row.AddCell().SetValue(info.Platform)
  709. // }
  710. // }
  711. // //SendSiteInfoToWxWork(file)
  712. // file.Save("res/tmp.xlsx")
  713. //}