handler.go 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297
  1. package spider
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. mu "mfw/util"
  8. "net/http"
  9. "net/url"
  10. "os"
  11. "path/filepath"
  12. qu "qfw/util"
  13. mgu "qfw/util/mongodbutil"
  14. "regexp"
  15. util "spiderutil"
  16. "strings"
  17. "sync"
  18. "time"
  19. "github.com/donnie4w/go-logger/logger"
  20. "github.com/yuin/gopher-lua"
  21. . "gopkg.in/mgo.v2/bson"
  22. )
  23. var SpiderHeart sync.Map = sync.Map{} //爬虫心跳
  24. var Allspiders sync.Map = sync.Map{}
  25. var Allspiders2 sync.Map = sync.Map{}
  26. var LoopListPath sync.Map = sync.Map{}
  27. //var ChanDels = map[int]string{}
  28. //var lock sync.Mutex
  29. var CC chan *lua.LState
  30. var CC2 chan *lua.LState
  31. var Chansize int
  32. var regcode, _ = regexp.Compile(`="(.*)"`)
  33. var InitCount int
  34. var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
  35. func InitSpider() {
  36. scriptMap := getSpiderScriptDB("all") //加载爬虫,初始化模板
  37. scriptMapFile := getSpiderScriptFile(false)
  38. for code, v := range scriptMap {
  39. LoopListPath.Store(code, v)
  40. InitCount++
  41. }
  42. for code, v := range scriptMapFile {
  43. LoopListPath.Store(code, v)
  44. InitCount++
  45. }
  46. if util.Config.Working == 0 {
  47. NoQueueScript() //高性能模式
  48. } else {
  49. if util.Config.Modal == 0 { //原始模式
  50. QueueUpScriptList()
  51. } else { //列表页和三级页分开采集
  52. go QueueUpScriptList() //节能模式列表页
  53. go QueueUpScriptDetail() //节能模式三级页
  54. }
  55. }
  56. }
  57. //高性能模式
  58. func NoQueueScript() {
  59. list := *mgu.Find("spider_ldtime", "spider", "spider", ``, nil, `{"code":1,"uplimit":1,"lowlimit":1}`, false, -1, -1)
  60. LoopListPath.Range(func(key, temp interface{}) bool {
  61. if info, ok := temp.(map[string]string); ok {
  62. code := info["code"]
  63. script := info["script"]
  64. sp, errstr := NewSpider(code, script)
  65. if errstr == "" && sp != nil && sp.Code != "nil" { //脚本加载成功
  66. //sp.Index = qu.IntAll(key)
  67. //sp2.Index = qu.IntAll(key)
  68. if info["createuser"] != "" {
  69. sp.UserName = info["createuser"]
  70. }
  71. if info["createuseremail"] != "" {
  72. sp.UserEmail = info["createuseremail"]
  73. }
  74. sp.MUserName = info["modifyuser"]
  75. sp.MUserEmail = info["modifyemail"]
  76. Allspiders.Store(sp.Code, sp)
  77. for _, tmp := range list {
  78. if qu.ObjToString(tmp["code"]) == sp.Code {
  79. sp.UpperLimit = qu.IntAll(tmp["uplimit"])
  80. //sp2.UpperLimit = qu.IntAll(tmp["uplimit"])
  81. sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
  82. //sp2.LowerLimit = qu.IntAll(tmp["lowlimit"])
  83. break
  84. }
  85. }
  86. if util.Config.Modal == 1 { //列表页、三级页分开采集模式
  87. sp2, _ := NewSpider(code, script)
  88. sp2.UserName = sp.UserName
  89. sp2.UserEmail = sp.UserEmail
  90. sp2.MUserName = sp.MUserName
  91. sp2.MUserEmail = sp.MUserEmail
  92. Allspiders2.Store(sp.Code, sp2)
  93. }
  94. sp.StartJob()
  95. //util.TimeSleepFunc(10*time.Millisecond, TimeSleepChan)
  96. } else {
  97. logger.Info(code, "脚本加载失败,请检查!")
  98. nowT := time.Now().Unix()
  99. username := "异常"
  100. if sp != nil {
  101. username = sp.MUserName
  102. }
  103. mgu.Update("spider_loadfail", "spider", "spider",
  104. map[string]interface{}{
  105. "code": code,
  106. "modifytime": map[string]interface{}{
  107. "$gte": nowT - 12*3600,
  108. "$lte": nowT + 12*3600,
  109. },
  110. },
  111. map[string]interface{}{
  112. "$set": map[string]interface{}{
  113. "code": code,
  114. "type": "初始化",
  115. "script": script,
  116. "updatetime": nowT,
  117. "modifyuser": username,
  118. "event": util.Config.Uploadevent,
  119. "err": errstr,
  120. },
  121. }, true, false)
  122. }
  123. time.Sleep(100 * time.Millisecond)
  124. }
  125. return true
  126. })
  127. InitAllLuaOver <- true //爬虫初始化完毕
  128. logger.Info("高性能模式:LUA加载完成")
  129. numSpider := 0
  130. Allspiders.Range(func(key, value interface{}) bool {
  131. numSpider++
  132. return true
  133. })
  134. logger.Info("总共加载脚本数:", numSpider)
  135. }
  136. //排队模式下载列表页数据
  137. func QueueUpScriptList() {
  138. logger.Info("节能模式列表页")
  139. CC = make(chan *lua.LState, util.Config.Chansize)
  140. for i := 0; i < util.Config.Chansize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  141. CC <- lua.NewState(lua.Options{
  142. RegistrySize: 256 * 20,
  143. CallStackSize: 256,
  144. IncludeGoStackTrace: false,
  145. })
  146. }
  147. for {
  148. listLen, listNoLen, DelLen := 0, 0, 0
  149. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环", "初始化脚本数量:", InitCount)
  150. LoopListPath.Range(func(key, temp interface{}) bool {
  151. if info, ok := temp.(map[string]string); ok {
  152. code := info["code"]
  153. old_is_running := false
  154. tmp, b := Allspiders.Load(code)
  155. if b {
  156. if sp_old, ok := tmp.(*Spider); ok {
  157. if !sp_old.Stop {
  158. old_is_running = true
  159. }
  160. }
  161. }
  162. logger.Info("Code:", code, "Is Downloading List:", old_is_running)
  163. if !old_is_running { //判断当前爬虫是否正在执行
  164. script := info["script"]
  165. sp, errstr := NewSpider_New(code, script, false)
  166. //logger.Info("初始化脚本是否成功:", sp != nil, e.Value)
  167. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  168. //sp.Index = qu.IntAll(key)
  169. if info["createuser"] != "" {
  170. sp.UserName = info["createuser"]
  171. }
  172. if info["createuseremail"] != "" {
  173. sp.UserEmail = info["createuseremail"]
  174. }
  175. sp.MUserName = info["modifyuser"]
  176. sp.MUserEmail = info["modifyemail"]
  177. Allspiders.Store(code, sp)
  178. sp.StartJob()
  179. } else {
  180. nowT := time.Now().Unix()
  181. username := "异常"
  182. if sp != nil {
  183. username = sp.MUserName
  184. }
  185. mgu.Update("spider_loadfail", "spider", "spider",
  186. map[string]interface{}{
  187. "code": code,
  188. "modifytime": map[string]interface{}{
  189. "$gte": nowT - 12*3600,
  190. "$lte": nowT + 12*3600,
  191. },
  192. },
  193. map[string]interface{}{
  194. "$set": map[string]interface{}{
  195. "code": code,
  196. "type": "初始化",
  197. "script": script,
  198. "updatetime": nowT,
  199. "modifyuser": username,
  200. "event": util.Config.Uploadevent,
  201. "err": errstr,
  202. },
  203. }, true, false)
  204. }
  205. if sp != nil && sp.IsHistoricalMend { //下载历史的爬虫执行一次后删除
  206. DelLen++
  207. LoopListPath.Delete(key)
  208. b := mgu.Update("luaconfig", "editor", "editor", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": 6}}, false, false)
  209. logger.Debug("Delete History Code:", code, b)
  210. }
  211. }
  212. listLen++
  213. } else {
  214. logger.Info("Code:", key, "Is Not Download List")
  215. listNoLen++
  216. }
  217. time.Sleep(1 * time.Second)
  218. return true
  219. })
  220. time.Sleep(1 * time.Second)
  221. count_ok, count_no := 0, 0
  222. LoopListPath.Range(func(k, v interface{}) bool {
  223. if v != nil {
  224. count_ok++
  225. } else {
  226. count_no++
  227. }
  228. return true
  229. })
  230. InitCount = count_ok
  231. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环,列表长度,", listLen, listNoLen, "删除数量", DelLen, "执行完毕后数量统计:", count_ok, count_no)
  232. }
  233. }
  234. //排队模式下载三级页数据
  235. func QueueUpScriptDetail() {
  236. logger.Info("节能模式三级页")
  237. chanSize := util.Config.DetailChansize
  238. CC2 = make(chan *lua.LState, chanSize)
  239. for i := 0; i < chanSize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  240. CC2 <- lua.NewState(lua.Options{
  241. RegistrySize: 256 * 20,
  242. CallStackSize: 256,
  243. IncludeGoStackTrace: false,
  244. })
  245. }
  246. for {
  247. count_ok, count_no := 0, 0
  248. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环", "初始化脚本数量:", InitCount)
  249. LoopListPath.Range(func(key, temp interface{}) bool {
  250. if info, ok := temp.(map[string]string); ok {
  251. count_ok++
  252. code := info["code"]
  253. old_is_running := false
  254. tmp, b := Allspiders2.Load(code)
  255. if b {
  256. if sp_old, ok := tmp.(*Spider); ok {
  257. if !sp_old.Stop {
  258. old_is_running = true
  259. }
  260. }
  261. }
  262. logger.Info("Code:", code, "Is Downloading Detail:", old_is_running)
  263. if !old_is_running { //判断当前爬虫是否正在执行
  264. script := info["script"]
  265. sp, errstr := NewSpider_New(code, script, true)
  266. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  267. //sp.Index = qu.IntAll(key)
  268. if info["createuser"] != "" {
  269. sp.UserName = info["createuser"]
  270. }
  271. if info["createuseremail"] != "" {
  272. sp.UserEmail = info["createuseremail"]
  273. }
  274. sp.MUserName = info["modifyuser"]
  275. sp.MUserEmail = info["modifyemail"]
  276. Allspiders2.Store(code, sp)
  277. go sp.DownloadListDetail() //下载三级页信息
  278. }
  279. }
  280. } else {
  281. logger.Info("Code:", key, "Is Not Download Detail")
  282. count_no++
  283. }
  284. time.Sleep(1 * time.Second)
  285. return true
  286. })
  287. InitCount = count_ok
  288. time.Sleep(1 * time.Second)
  289. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环完毕,数量统计:", count_ok, count_no)
  290. }
  291. }
  292. //获取所有爬虫脚本--数据库
  293. func getSpiderScriptDB(code string) map[string]map[string]string {
  294. scriptSpider := map[string]map[string]string{}
  295. query := ``
  296. if code == "all" { //初始化所有脚本
  297. query = `{"state":5,"event":` + fmt.Sprint(util.Config.Uploadevent) + `}`
  298. } else { //消息在线上传
  299. query = `{"code":"` + code + `","event":` + fmt.Sprint(util.Config.Uploadevent) + `}`
  300. //query = `{"$or":[{"iupload":1},{"iupload":3}],"event":` + fmt.Sprint(util.Config.Uploadevent) + `,"modifytime":{"$gt":1502937042}}`
  301. }
  302. listdb := mgu.Find("luaconfig", "editor", "editor", query, `{"_id":-1}`, nil, false, -1, -1)
  303. //临时历史附件
  304. //listdb := mgu.Find("luaconfig_test", "editor", "editor", query, `{"_id":-1}`, nil, false, -1, -1)
  305. for _, v := range *listdb {
  306. old := qu.IntAll(v["old_lua"])
  307. script := ""
  308. if old == 1 {
  309. script = fmt.Sprint(v["luacontent"])
  310. } else {
  311. if v["oldlua"] != nil {
  312. if v["luacontent"] != nil {
  313. script = v["luacontent"].(string)
  314. }
  315. } else {
  316. script = GetScriptByTmp(v)
  317. }
  318. }
  319. scriptSpider[fmt.Sprint(v["code"])] = map[string]string{
  320. "code": fmt.Sprint(v["code"]),
  321. "type": fmt.Sprint(v["state"]),
  322. "script": script,
  323. "createuser": fmt.Sprint(v["createuser"]),
  324. "createuseremail": fmt.Sprint(v["createuseremail"]),
  325. "modifyuser": fmt.Sprint(v["modifyuser"]),
  326. "modifyemail": fmt.Sprint(v["next"]),
  327. }
  328. }
  329. return scriptSpider
  330. }
  331. //获取所有爬虫脚本--文件
  332. func getSpiderScriptFile(newscript bool) map[string]map[string]string {
  333. scriptSpider := map[string]map[string]string{}
  334. filespider := 0
  335. filepath.Walk("res", func(path string, info os.FileInfo, err error) error {
  336. if info.IsDir() {
  337. return nil
  338. } else if strings.HasPrefix(info.Name(), "spider_") &&
  339. strings.HasSuffix(info.Name(), ".lua") {
  340. //过滤test目录
  341. if strings.Contains(path, "\\test\\") {
  342. return nil
  343. }
  344. loadfile := true
  345. if newscript {
  346. if time.Now().Unix() < info.ModTime().Add(time.Duration(15)*time.Minute).Unix() {
  347. loadfile = true
  348. } else {
  349. loadfile = false
  350. }
  351. }
  352. if loadfile {
  353. f, err := os.Open(path)
  354. defer f.Close()
  355. if err != nil {
  356. logger.Error(err.Error())
  357. }
  358. buf := bufio.NewReader(f)
  359. script := ""
  360. code := ""
  361. for {
  362. line, err := buf.ReadString('\n')
  363. if code == "" && strings.Contains(line, "spiderCode=") {
  364. res := regcode.FindAllStringSubmatch(line, -1)
  365. if len(res) > 0 {
  366. code = res[0][1]
  367. //logger.Info("code", code)
  368. } else {
  369. break
  370. }
  371. }
  372. if scriptSpider[code] == nil {
  373. script = script + line + "\n"
  374. } else {
  375. break
  376. }
  377. if err != nil {
  378. break
  379. }
  380. }
  381. if code != "" && script != "" && scriptSpider[code] == nil {
  382. scriptSpider[code] = map[string]string{
  383. "code": code,
  384. "type": "5",
  385. "script": script,
  386. //脚本文件属性值空
  387. "createuser": "",
  388. "createuseremail": "",
  389. "modifyuser": "",
  390. "modifyemail": "",
  391. }
  392. filespider = filespider + 1
  393. //logger.Info("script", script)
  394. }
  395. }
  396. }
  397. return nil
  398. })
  399. logger.Info("节点", util.Config.Uploadevent, "脚本文件爬虫数", filespider)
  400. return scriptSpider
  401. }
  402. //脚本下架、上架、重载
  403. func UpdateSpiderByCodeState(code, state string) (bool, error) {
  404. up := false
  405. var err error
  406. if state != "5" && state != "-1" { //脚本下架
  407. SpiderHeart.Delete(code) //脚本下架,删除脚本对应心跳
  408. logger.Info("下架脚本", code)
  409. if util.Config.Working == 1 { //队列模式
  410. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  411. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用(7700下架爬虫)
  412. continue
  413. }
  414. tmp, b := as.Load(code)
  415. if b {
  416. sp, ok := tmp.(*Spider)
  417. if ok {
  418. if !sp.Stop { //脚本未执行
  419. sp.Stop = true
  420. }
  421. }
  422. as.Delete(code)
  423. logger.Info("下架脚本,Allspiders删除")
  424. }
  425. }
  426. //LoopListPath.Range(func(k, v interface{}) bool {
  427. // //if v != nil {
  428. // // info, _ := v.(map[string]string)
  429. // // if info["code"] == code {
  430. // // LoopListPath.Store(k, nil)
  431. // // lock.Lock()
  432. // // defer lock.Unlock()
  433. // // ChanDels[qu.IntAll(k)] = code
  434. // // logger.Info("下架脚本,LoopListPath更新为nil,ChanDels中位置:", k)
  435. // // }
  436. // //}
  437. // if k == code {
  438. // LoopListPath.Delete(k)
  439. // logger.Info(code, "脚本下架成功")
  440. // return false //跳出循环
  441. // }
  442. // return true
  443. //})
  444. } else { //高性能模式
  445. for _, as := range []sync.Map{Allspiders, Allspiders2} {
  446. if tmp, ok := as.Load(code); ok {
  447. sp, ok := tmp.(*Spider)
  448. if ok {
  449. sp.Stop = true
  450. sp.L.Close()
  451. as.Delete(code)
  452. }
  453. }
  454. }
  455. }
  456. LoopListPath.Delete(code)
  457. logger.Info(code, "脚本下架成功")
  458. up = true
  459. err = nil
  460. } else if state == "-1" { //爬虫重采更新线上爬虫
  461. scriptMap := getSpiderScriptDB(code)
  462. logger.Info("更新线上脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  463. if util.Config.Working == 1 { //排队模式
  464. for _, v := range scriptMap {
  465. listsize := 0
  466. listHas := false
  467. count_ok, count_no := 0, 0
  468. LoopListPath.Range(func(key, val interface{}) bool {
  469. listsize++
  470. if tmp, ok := val.(map[string]string); ok {
  471. count_ok++
  472. if tmp["code"] == code && key == code { //队列存在,重载脚本
  473. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  474. listHas = true
  475. LoopListPath.Store(key, v)
  476. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  477. logger.Info("队列模式更新列表页信息状态", code)
  478. }
  479. } else {
  480. count_no++
  481. }
  482. return true
  483. })
  484. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  485. if !listHas { //队列不存在
  486. logger.Info("重采更新爬虫失败:", code)
  487. up = false
  488. err = errors.New("爬虫不在线:" + code)
  489. } else {
  490. up = true
  491. err = nil
  492. logger.Info("重采更新爬虫成功", code)
  493. }
  494. }
  495. } else {
  496. for k, v := range scriptMap {
  497. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  498. sp := spd.(*Spider)
  499. sp.ScriptFile = v["script"]
  500. if v["createuser"] != "" {
  501. sp.UserName = v["createuser"]
  502. }
  503. if v["createuseremail"] != "" {
  504. sp.UserEmail = v["createuseremail"]
  505. }
  506. sp.MUserName = v["modifyuser"]
  507. sp.MUserEmail = v["modifyemail"]
  508. Allspiders.Store(k, sp)
  509. up = true
  510. err = nil
  511. logger.Info("重采更新爬虫成功", sp.Code)
  512. } else { //不存在
  513. up = false
  514. err = errors.New("爬虫不在线:" + code)
  515. logger.Info("重采更新爬虫失败:", code)
  516. }
  517. //Allspiders2
  518. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  519. sp2 := spd2.(*Spider)
  520. sp2.ScriptFile = v["script"]
  521. if v["createuser"] != "" {
  522. sp2.UserName = v["createuser"]
  523. }
  524. if v["createuseremail"] != "" {
  525. sp2.UserEmail = v["createuseremail"]
  526. }
  527. sp2.MUserName = v["modifyuser"]
  528. sp2.MUserEmail = v["modifyemail"]
  529. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true) //更新上架,重载脚本
  530. Allspiders2.Store(k, sp2)
  531. // up = true
  532. // err = nil
  533. logger.Info("Allspiders2重采更新爬虫成功", sp2.Code)
  534. } else { //不存在
  535. // up = false
  536. // err = errors.New("爬虫不在线:" + code)
  537. logger.Info("Allspiders2重采更新爬虫失败:", code)
  538. }
  539. }
  540. }
  541. } else { //脚本上架
  542. scriptMap := getSpiderScriptDB(code)
  543. logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  544. if util.Config.Working == 1 { //排队模式
  545. for _, v := range scriptMap {
  546. listsize := 0
  547. listHas := false
  548. count_ok, count_no := 0, 0
  549. LoopListPath.Range(func(key, val interface{}) bool {
  550. listsize++
  551. if tmp, ok := val.(map[string]string); ok { //此处判断仅仅为了得到count_ok的值,可直接判断key==code
  552. count_ok++
  553. if tmp["code"] == code && code == key { //队列存在,重载脚本
  554. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  555. listHas = true
  556. LoopListPath.Store(key, v)
  557. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  558. logger.Info("队列模式更新列表页信息状态", code)
  559. }
  560. } else {
  561. count_no++
  562. }
  563. return true
  564. })
  565. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  566. if !listHas { //队列中不存在,新增
  567. logger.Info("上架新增脚本,队列中不存在")
  568. LoopListPath.Store(code, v) //上架
  569. // lock.Lock()
  570. // defer lock.Unlock()
  571. // if len(ChanDels) > 0 {
  572. // for i, _ := range ChanDels {
  573. // logger.Info("上架新增脚本,替补队列中位置", i)
  574. // LoopListPath.Store(i, v)
  575. // delete(ChanDels, i)
  576. // break
  577. // }
  578. // } else {
  579. // logger.Info("上架新增脚本,新增队列中位置", listsize)
  580. // LoopListPath.Store(listsize, v) //上架
  581. // }
  582. //校验是否上架成功
  583. saveList := false //记录是否上架成功
  584. listsize, count_ok, count_no = 0, 0, 0
  585. LoopListPath.Range(func(key, val interface{}) bool {
  586. listsize++
  587. if tmp, ok := val.(map[string]string); ok {
  588. count_ok++
  589. if tmp["code"] == code && key == code { //队列存在
  590. saveList = true
  591. logger.Info("上架脚本成功", code)
  592. }
  593. } else {
  594. count_no++
  595. }
  596. return true
  597. })
  598. logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  599. if !saveList { //上架失败
  600. logger.Info("上架脚本", code, " 失败")
  601. return false, errors.New("use " + code + " failed")
  602. }
  603. }
  604. logger.Info("上架新增脚本", code)
  605. up = true
  606. }
  607. } else {
  608. for k, v := range scriptMap {
  609. LoopListPath.Store(k, v)
  610. //1、Allspiders对应7000、7100、7400脚本上架下载数据(列表页爬虫集合)
  611. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  612. sp := spd.(*Spider)
  613. sp.ScriptFile = v["script"]
  614. if v["createuser"] != "" {
  615. sp.UserName = v["createuser"]
  616. }
  617. if v["createuseremail"] != "" {
  618. sp.UserEmail = v["createuseremail"]
  619. }
  620. sp.MUserName = v["modifyuser"]
  621. sp.MUserEmail = v["modifyemail"]
  622. //sp.LoadScript(k, sp.ScriptFile, true) //更新上架,重载脚本
  623. Allspiders.Store(k, sp)
  624. up = true
  625. err = nil
  626. logger.Info("上架重载脚本", sp.Code)
  627. } else { //新增脚本
  628. sp, errstr := NewSpider(k, v["script"])
  629. if errstr == "" && sp != nil && sp.Code != "nil" {
  630. if v["createuser"] != "" {
  631. sp.UserName = v["createuser"]
  632. }
  633. if v["createuseremail"] != "" {
  634. sp.UserEmail = v["createuseremail"]
  635. }
  636. sp.MUserName = v["modifyuser"]
  637. sp.MUserEmail = v["modifyemail"]
  638. Allspiders.Store(k, sp)
  639. sp.Stop = false
  640. sp.StartJob()
  641. up = true
  642. err = nil
  643. logger.Info("上架新增脚本", sp.Code)
  644. } else {
  645. err = errors.New("新增失败")
  646. nowT := time.Now().Unix()
  647. mgu.Update("spider_loadfail", "spider", "spider",
  648. map[string]interface{}{
  649. "code": k,
  650. "modifytime": map[string]interface{}{
  651. "$gte": nowT - 12*3600,
  652. "$lte": nowT + 12*3600,
  653. },
  654. },
  655. map[string]interface{}{
  656. "$set": map[string]interface{}{
  657. "code": k,
  658. "type": "新增初始化脚本",
  659. "script": v["script"],
  660. "updatetime": nowT,
  661. "modifyuser": sp.MUserName,
  662. "event": util.Config.Uploadevent,
  663. "err": errstr,
  664. },
  665. }, true, false)
  666. }
  667. }
  668. //2、Allspiders2对应7100、7110、7400上架采集三级页数据(Allspiders2三级页爬虫集合)
  669. if util.Config.Modal == 1 { //高性能老模式不根据列表页数据采三级页(7000、7410)
  670. //Allspiders2
  671. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  672. sp2 := spd2.(*Spider)
  673. sp2.ScriptFile = v["script"]
  674. if v["createuser"] != "" {
  675. sp2.UserName = v["createuser"]
  676. }
  677. if v["createuseremail"] != "" {
  678. sp2.UserEmail = v["createuseremail"]
  679. }
  680. sp2.MUserName = v["modifyuser"]
  681. sp2.MUserEmail = v["modifyemail"]
  682. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true) //更新上架,重载脚本
  683. Allspiders2.Store(k, sp2) //重载后放入集合
  684. UpdateHighListDataByCode(k) //爬虫更新上架后,重置数据state=0
  685. // up = true
  686. // err = nil
  687. logger.Info("Allspiders2上架重载脚本", sp2.Code)
  688. } else { //新增脚本
  689. sp2, errstr := NewSpider(k, v["script"])
  690. if errstr == "" && sp2 != nil && sp2.Code != "nil" {
  691. if v["createuser"] != "" {
  692. sp2.UserName = v["createuser"]
  693. }
  694. if v["createuseremail"] != "" {
  695. sp2.UserEmail = v["createuseremail"]
  696. }
  697. sp2.MUserName = v["modifyuser"]
  698. sp2.MUserEmail = v["modifyemail"]
  699. sp2.Stop = false
  700. go sp2.DownloadHighDetail() //根据列表页数据下载三级页
  701. Allspiders2.Store(k, sp2)
  702. // up = true
  703. // err = nil
  704. logger.Info("Allspiders2上架新增脚本", sp2.Code)
  705. } /*else {
  706. err = errors.New("新增失败")
  707. mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  708. "code": k,
  709. "type": "新增脚本失败",
  710. "script": v["script"],
  711. "intime": time.Now().Format(qu.Date_Full_Layout),
  712. "event": util.Config.Uploadevent,
  713. })
  714. }*/
  715. }
  716. }
  717. }
  718. }
  719. }
  720. logger.Info("上下架:", up, err)
  721. return up, err
  722. }
  723. //定时重载脚本文件
  724. func ReloadSpiderFile() {
  725. scriptMap := getSpiderScriptFile(true)
  726. for k, v := range scriptMap {
  727. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  728. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用
  729. continue
  730. }
  731. if spd, ok := as.Load(k); ok { //对应脚本已存在,更新
  732. sp := spd.(*Spider)
  733. logger.Info("定时重载脚本", sp.Code)
  734. sp.ScriptFile = v["script"]
  735. if v["createuser"] != "" {
  736. sp.UserName = v["createuser"]
  737. }
  738. if v["createuseremail"] != "" {
  739. sp.UserEmail = v["createuseremail"]
  740. }
  741. sp.MUserName = v["modifyuser"]
  742. sp.MUserEmail = v["modifyemail"]
  743. as.Store(k, sp)
  744. } else { //新增脚本
  745. var sp *Spider
  746. var errstr string
  747. if util.Config.Working == 1 { //排队模式
  748. if i == 0 {
  749. //length := 0
  750. //LoopListPath.Range(func(k, v interface{}) bool {
  751. // length++
  752. // return true
  753. //})
  754. LoopListPath.Store(k, v) //排队模式Allspiders,Allspiders2共用一个LoopListPath,新增一次即可
  755. sp, errstr = NewSpider_New(k, v["script"], false)
  756. } else {
  757. sp, errstr = NewSpider_New(k, v["script"], true)
  758. }
  759. } else {
  760. sp, errstr = NewSpider(k, v["script"])
  761. }
  762. if errstr == "" && sp != nil && sp.Code != "nil" {
  763. if v["createuser"] != "" {
  764. sp.UserName = v["createuser"]
  765. }
  766. if v["createuseremail"] != "" {
  767. sp.UserEmail = v["createuseremail"]
  768. }
  769. sp.MUserName = v["modifyuser"]
  770. sp.MUserEmail = v["modifyemail"]
  771. as.Store(k, sp)
  772. if util.Config.Working == 1 {
  773. sp.Stop = true
  774. // if i == 0 {
  775. // length := 0
  776. // LoopListPath.Range(func(k, v interface{}) bool {
  777. // length++
  778. // return true
  779. // })
  780. // LoopListPath.Store(length, v)
  781. // }
  782. } else {
  783. sp.Stop = false
  784. if i == 0 { //高性能模式只有Allspiders启动爬虫,Allspiders2只负责下三级页
  785. sp.StartJob()
  786. }
  787. }
  788. logger.Info("定时重载脚本--新增", sp.Code)
  789. } else {
  790. if i == 0 {
  791. nowT := time.Now().Unix()
  792. mgu.Update("spider_loadfail", "spider", "spider",
  793. map[string]interface{}{
  794. "code": k,
  795. "modifytime": map[string]interface{}{
  796. "$gte": nowT - 12*3600,
  797. "$lte": nowT + 12*3600,
  798. },
  799. },
  800. map[string]interface{}{
  801. "$set": map[string]interface{}{
  802. "code": k,
  803. "type": "定时重载--新增失败",
  804. "script": v["script"],
  805. "updatetime": nowT,
  806. "modifyuser": sp.MUserName,
  807. "event": util.Config.Uploadevent,
  808. "err": errstr,
  809. },
  810. }, true, false)
  811. }
  812. }
  813. }
  814. }
  815. // if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  816. // sp := spd.(*Spider)
  817. // logger.Info("定时重载脚本", sp.Code)
  818. // sp.ScriptFile = v["script"]
  819. // if v["createuser"] != "" {
  820. // sp.UserName = v["createuser"]
  821. // }
  822. // if v["createuseremail"] != "" {
  823. // sp.UserEmail = v["createuseremail"]
  824. // }
  825. // sp.MUserName = v["modifyuser"]
  826. // sp.MUserEmail = v["modifyemail"]
  827. // Allspiders.Store(k, sp)
  828. // } else { //新增脚本
  829. // var sp *Spider
  830. // if util.Config.Working == 1 { //排队模式
  831. // length := 0
  832. // LoopListPath.Range(func(k, v interface{}) bool {
  833. // length++
  834. // return true
  835. // })
  836. // LoopListPath.Store(length, v)
  837. // sp = NewSpider_New(k, v["script"], false)
  838. // } else {
  839. // sp = NewSpider(k, v["script"])
  840. // }
  841. // if sp != nil && sp.Code != "nil" {
  842. // if v["createuser"] != "" {
  843. // sp.UserName = v["createuser"]
  844. // }
  845. // if v["createuseremail"] != "" {
  846. // sp.UserEmail = v["createuseremail"]
  847. // }
  848. // sp.MUserName = v["modifyuser"]
  849. // sp.MUserEmail = v["modifyemail"]
  850. // Allspiders.Store(k, sp)
  851. // if util.Config.Working == 1 {
  852. // sp.Stop = true
  853. // length := 0
  854. // LoopListPath.Range(func(k, v interface{}) bool {
  855. // length++
  856. // return true
  857. // })
  858. // LoopListPath.Store(length, v)
  859. // } else {
  860. // sp.Stop = false
  861. // sp.StartJob()
  862. // }
  863. // logger.Info("定时重载脚本--新增", sp.Code)
  864. // } else {
  865. // mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  866. // "code": k,
  867. // "type": "定时重载--新增失败",
  868. // "script": v["script"],
  869. // "intime": time.Now().Format(qu.Date_Full_Layout),
  870. // "event": util.Config.Uploadevent,
  871. // })
  872. // }
  873. // }
  874. }
  875. util.TimeAfterFunc(time.Duration(15)*time.Minute, ReloadSpiderFile, TimeChan)
  876. }
  877. //排队模式生成爬虫
  878. func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
  879. defer mu.Catch()
  880. spider := &Spider{}
  881. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate)
  882. if err != "" {
  883. return nil, err
  884. }
  885. spider.Code = spider.GetVar("spiderCode")
  886. spider.Script.SCode = spider.Code
  887. spider.Name = spider.GetVar("spiderName")
  888. spider.Channel = spider.GetVar("spiderChannel")
  889. //spider.LastExecTime = GetLastExectime(spider.Code)
  890. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  891. spider.Collection = spider.GetVar("spider2Collection")
  892. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  893. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  894. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  895. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  896. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  897. if spiderSleepBase == -1 {
  898. spider.SleepBase = 1000
  899. } else {
  900. spider.SleepBase = spiderSleepBase
  901. }
  902. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  903. if spiderSleepRand == -1 {
  904. spider.SleepRand = 1000
  905. } else {
  906. spider.SleepRand = spiderSleepRand
  907. }
  908. spiderTimeout := spider.GetIntVar("spiderTimeout")
  909. if spiderTimeout == -1 {
  910. spider.Timeout = 60
  911. } else {
  912. spider.Timeout = int64(spiderTimeout)
  913. }
  914. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  915. if v, ok := Allspiders.Load(spider.Code); ok {
  916. sp := v.(*Spider)
  917. spider.TodayDowncount = sp.TodayDowncount
  918. spider.ToDayRequestNum = sp.ToDayRequestNum
  919. spider.YesterdayDowncount = sp.YesterdayDowncount
  920. spider.YestoDayRequestNum = sp.YestoDayRequestNum
  921. spider.TotalDowncount = sp.TotalDowncount
  922. spider.TotalRequestNum = sp.TotalRequestNum
  923. spider.ErrorNum = sp.ErrorNum
  924. spider.RoundCount = sp.RoundCount
  925. }
  926. spider.UserName = spider.GetVar("spiderUserName")
  927. spider.UserEmail = spider.GetVar("spiderUserEmail")
  928. spider.UploadTime = spider.GetVar("spiderUploadTime")
  929. //新增历史补漏
  930. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  931. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  932. //新老爬虫
  933. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  934. return spider, ""
  935. }
  936. //高性能模式生成爬虫
  937. func NewSpider(code, luafile string) (*Spider, string) {
  938. defer mu.Catch()
  939. spider := &Spider{}
  940. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true)
  941. if err != "" {
  942. return nil, err
  943. }
  944. spider.Code = spider.GetVar("spiderCode")
  945. spider.SCode = spider.Code
  946. spider.Name = spider.GetVar("spiderName")
  947. spider.Channel = spider.GetVar("spiderChannel")
  948. //spider.LastExecTime = GetLastExectime(spider.Code)
  949. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  950. spider.Collection = spider.GetVar("spider2Collection")
  951. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  952. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  953. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  954. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  955. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  956. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  957. if spiderSleepBase == -1 {
  958. spider.SleepBase = 1000
  959. } else {
  960. spider.SleepBase = spiderSleepBase
  961. }
  962. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  963. if spiderSleepRand == -1 {
  964. spider.SleepRand = 1000
  965. } else {
  966. spider.SleepRand = spiderSleepRand
  967. }
  968. spiderTimeout := spider.GetIntVar("spiderTimeout")
  969. if spiderTimeout == -1 {
  970. spider.Timeout = 60
  971. } else {
  972. spider.Timeout = int64(spiderTimeout)
  973. }
  974. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  975. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  976. tmp := GetDownloadLast(spider.Code, date) //
  977. if len(tmp) > 0 {
  978. spider.TodayDowncount = int32(qu.IntAll(tmp["todaydowncount"]))
  979. spider.ToDayRequestNum = int32(qu.IntAll(tmp["todaydownreq"]))
  980. spider.YesterdayDowncount = int32(qu.IntAll(tmp["yesdowncount"]))
  981. spider.YestoDayRequestNum = int32(qu.IntAll(tmp["yesdownreq"]))
  982. spider.TotalDowncount = spider.TodayDowncount + int32(qu.IntAll(tmp["totaldown"]))
  983. spider.TotalRequestNum = spider.ToDayRequestNum + int32(qu.IntAll(tmp["totalreq"]))
  984. }
  985. spider.UserName = spider.GetVar("spiderUserName")
  986. spider.UserEmail = spider.GetVar("spiderUserEmail")
  987. spider.UploadTime = spider.GetVar("spiderUploadTime")
  988. //新增历史补漏
  989. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  990. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  991. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  992. //新老爬虫
  993. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  994. return spider, ""
  995. }
  996. //下载量入库
  997. func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum, yesterdayDowncount, yestoDayRequestNum int32) {
  998. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  999. updata := M{}
  1000. if addtotal {
  1001. updata = M{
  1002. "$inc": M{"totaldown": todayDowncount, "totalreq": todayRequestNum},
  1003. "$set": M{
  1004. "yesdowncount": yesterdayDowncount,
  1005. "yesdownreq": yestoDayRequestNum,
  1006. "todaydowncount": todayDowncount,
  1007. "todaydownreq": todayRequestNum,
  1008. "date": date,
  1009. "year": time.Now().Year(),
  1010. "month": time.Now().Month(),
  1011. "day": time.Now().Day(),
  1012. },
  1013. }
  1014. } else {
  1015. updata = M{
  1016. "$set": M{
  1017. "yesdowncount": yesterdayDowncount,
  1018. "yesdownreq": yestoDayRequestNum,
  1019. "todaydowncount": todayDowncount,
  1020. "todaydownreq": todayRequestNum,
  1021. "date": date,
  1022. "year": time.Now().Year(),
  1023. "month": time.Now().Month(),
  1024. "day": time.Now().Day(),
  1025. },
  1026. }
  1027. }
  1028. mgu.Update("spider_downlog", "spider", "spider", M{"code": code, "date": date}, updata, true, false)
  1029. }
  1030. //获取下载的上下限(没用)
  1031. func GetLimitDownload(code string) (uplimit, lowlimit int) {
  1032. defer mu.Catch()
  1033. ret := mgu.FindOne("spider_ldtime", "spider", "spider", `{"code":"`+code+`"}`)
  1034. if *ret != nil {
  1035. uplimit = qu.IntAll((*ret)["uplimit"])
  1036. lowlimit = qu.IntAll((*ret)["lowlimit"])
  1037. return uplimit, lowlimit
  1038. } else {
  1039. return 100, 0
  1040. }
  1041. }
  1042. //拼装脚本
  1043. func GetScriptByTmp(luaconfig map[string]interface{}) string {
  1044. defer mu.Catch()
  1045. script := ""
  1046. if luaconfig["listcheck"] == nil {
  1047. luaconfig["listcheck"] = ""
  1048. }
  1049. if luaconfig["contentcheck"] == nil {
  1050. luaconfig["contentcheck"] = ""
  1051. }
  1052. if luaconfig != nil && len(luaconfig) > 0 {
  1053. common := luaconfig["param_common"].([]interface{})
  1054. //新增spiderIsHistoricalMend spiderIsMustDownload
  1055. if len(common) == 15 {
  1056. common = append(common, "", "", "")
  1057. } else {
  1058. common = append(common, false, false, "", "", "")
  1059. }
  1060. for k, v := range common {
  1061. if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
  1062. common[k] = qu.IntAll(v)
  1063. }
  1064. }
  1065. script, _ = GetTmpModel(map[string][]interface{}{"common": common})
  1066. script_time := ""
  1067. if luaconfig["type_time"] == 0 {
  1068. time := luaconfig["param_time"].([]interface{})
  1069. script_time, _ = GetTmpModel(map[string][]interface{}{
  1070. "time": time,
  1071. })
  1072. } else {
  1073. script_time = luaconfig["str_time"].(string)
  1074. }
  1075. script_list := ""
  1076. if luaconfig["type_list"] == 0 {
  1077. list := luaconfig["param_list"].([]interface{})
  1078. addrs := strings.Split(list[1].(string), "\n")
  1079. if len(addrs) > 0 {
  1080. for k, v := range addrs {
  1081. addrs[k] = "'" + v + "'"
  1082. }
  1083. list[1] = strings.Join(addrs, ",")
  1084. } else {
  1085. list[1] = ""
  1086. }
  1087. script_list, _ = GetTmpModel(map[string][]interface{}{
  1088. "list": list,
  1089. "listcheck": []interface{}{luaconfig["listcheck"]},
  1090. })
  1091. } else {
  1092. script_list = luaconfig["str_list"].(string)
  1093. }
  1094. script_content := ""
  1095. if luaconfig["type_content"] == 0 {
  1096. content := luaconfig["param_content"].([]interface{})
  1097. script_content, _ = GetTmpModel(map[string][]interface{}{
  1098. "content": content,
  1099. "contentcheck": []interface{}{luaconfig["contentcheck"]},
  1100. })
  1101. } else {
  1102. script_content = luaconfig["str_content"].(string)
  1103. }
  1104. script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"])
  1105. script += `
  1106. ` + script_time + `
  1107. ` + script_list + `
  1108. ` + script_content
  1109. script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
  1110. }
  1111. return script
  1112. }
  1113. //生成爬虫脚本
  1114. func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
  1115. qu.Try(func() {
  1116. if param != nil && param["common"] != nil {
  1117. if len(param["common"]) < 12 {
  1118. err = "公共参数配置不全"
  1119. } else {
  1120. script = fmt.Sprintf(util.Tmp_common, param["common"]...)
  1121. }
  1122. }
  1123. if param != nil && param["time"] != nil {
  1124. if len(param["time"]) < 3 {
  1125. err = "方法:time-参数配置不全"
  1126. } else {
  1127. script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
  1128. }
  1129. }
  1130. if param != nil && param["list"] != nil {
  1131. if len(param["list"]) < 7 {
  1132. err = "方法:list-参数配置不全"
  1133. } else {
  1134. list := []interface{}{param["listcheck"][0]}
  1135. list = append(list, param["list"]...)
  1136. script += fmt.Sprintf(util.Tmp_pagelist, list...)
  1137. script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
  1138. }
  1139. }
  1140. if param != nil && param["content"] != nil {
  1141. if len(param["content"]) < 2 {
  1142. err = "方法:content-参数配置不全"
  1143. } else {
  1144. content := []interface{}{param["contentcheck"][0]}
  1145. content = append(content, param["content"]...)
  1146. script += fmt.Sprintf(util.Tmp_content, content...)
  1147. }
  1148. }
  1149. }, func(e interface{}) {
  1150. err = e
  1151. })
  1152. return script, err
  1153. }
  1154. //补充模型
  1155. func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
  1156. defer mu.Catch()
  1157. //补充通用信息
  1158. commstr := `item["spidercode"]="` + comm[0].(string) + `";`
  1159. commstr += `item["site"]="` + comm[1].(string) + `";`
  1160. commstr += `item["channel"]="` + comm[2].(string) + `";`
  1161. script = strings.Replace(script, "--Common--", commstr, -1)
  1162. //补充模型信息
  1163. modelstr := ""
  1164. for k, v := range model {
  1165. modelstr += `item["` + k + `"]="` + v.(string) + `";`
  1166. }
  1167. script = strings.Replace(script, "--Model--", modelstr, -1)
  1168. return script
  1169. }
  1170. //爬虫信息提交编辑器(心跳)
  1171. func SpiderInfoSend() {
  1172. time.Sleep(15 * time.Second)
  1173. list := []interface{}{}
  1174. Allspiders.Range(func(key, value interface{}) bool {
  1175. v := value.(*Spider)
  1176. info := map[string]interface{}{}
  1177. info["code"] = v.Code
  1178. info["todayDowncount"] = v.TodayDowncount
  1179. info["toDayRequestNum"] = v.ToDayRequestNum
  1180. info["yesterdayDowncount"] = v.YesterdayDowncount
  1181. info["yestoDayRequestNum"] = v.YestoDayRequestNum
  1182. info["totalDowncount"] = v.TotalDowncount
  1183. info["totalRequestNum"] = v.TotalRequestNum
  1184. info["errorNum"] = v.ErrorNum
  1185. info["roundCount"] = v.RoundCount
  1186. info["runRate"] = v.SpiderRunRate
  1187. info["lastHeartbeat"] = v.LastHeartbeat
  1188. info["lastDowncount"] = v.LastDowncount
  1189. info["lstate"] = v.L.Status(v.L)
  1190. list = append(list, info)
  1191. return true
  1192. })
  1193. bs, _ := json.Marshal(list)
  1194. value := url.Values{
  1195. "data": []string{util.Se.EncodeString(string(bs))},
  1196. "type": []string{"info"},
  1197. }
  1198. _, err := http.PostForm(util.Config.Editoraddr, value)
  1199. if err != nil {
  1200. logger.Error("send to editor: ", err.Error())
  1201. }
  1202. util.TimeAfterFunc(5*time.Minute, SpiderInfoSend, TimeChan)
  1203. }
  1204. //保存心跳信息
  1205. func SaveHeartInfo() {
  1206. time.Sleep(30 * time.Second)
  1207. num := 0
  1208. SpiderHeart.Range(func(key, value interface{}) bool {
  1209. code := key.(string)
  1210. heart, ok := value.(*Heart)
  1211. if ok {
  1212. num++
  1213. update := []map[string]interface{}{}
  1214. update = append(update, map[string]interface{}{"code": code})
  1215. update = append(update, map[string]interface{}{"$set": map[string]interface{}{
  1216. "site": heart.Site,
  1217. "channel": heart.Channel,
  1218. "list": heart.ListHeart,
  1219. "findlist": heart.FindListHeart,
  1220. "detail": heart.DetailHeart,
  1221. "detailexecute": heart.DetailExecuteHeart,
  1222. "modifyuser": heart.ModifyUser,
  1223. "event": util.Config.Uploadevent,
  1224. "updatetime": time.Now().Unix(),
  1225. "del": false,
  1226. }})
  1227. UpdataHeartCache <- update
  1228. }
  1229. return true
  1230. })
  1231. logger.Info("更新心跳个数:", num)
  1232. time.AfterFunc(20*time.Minute, SaveHeartInfo)
  1233. }
  1234. //信息提交编辑器
  1235. func SpiderCodeSendToEditor(code string) {
  1236. defer qu.Catch()
  1237. ok := false
  1238. for i := 1; i <= 3; i++ {
  1239. logger.Info("Code:", code, " times:", i, " Send Move Event")
  1240. list := []interface{}{}
  1241. list = append(list, code)
  1242. bs, _ := json.Marshal(list)
  1243. value := url.Values{
  1244. "data": []string{util.Se.EncodeString(string(bs))},
  1245. "type": []string{"code"},
  1246. }
  1247. res, err := http.PostForm(util.Config.Editoraddr, value)
  1248. if err != nil {
  1249. logger.Error("Send To Editor For Move Event Failed,Code:", code)
  1250. } else {
  1251. if res != nil {
  1252. res.Body.Close()
  1253. }
  1254. ok = true
  1255. break
  1256. }
  1257. }
  1258. logger.Info("Code:", code, " Send Move Event:", ok)
  1259. mgu.Save("luamovelog", "editor", "editor", map[string]interface{}{
  1260. "code": code,
  1261. "comeintime": time.Now().Unix(),
  1262. "type": "sendfail",
  1263. "ok": ok,
  1264. })
  1265. }