handler.go 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351
  1. package spider
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. mu "mfw/util"
  8. "net/http"
  9. "net/url"
  10. "os"
  11. "path/filepath"
  12. qu "qfw/util"
  13. "regexp"
  14. util "spiderutil"
  15. "strings"
  16. "sync"
  17. "time"
  18. "github.com/donnie4w/go-logger/logger"
  19. "github.com/yuin/gopher-lua"
  20. )
  21. var SpiderHeart sync.Map = sync.Map{} //爬虫心跳
  22. var Allspiders sync.Map = sync.Map{}
  23. var Allspiders2 sync.Map = sync.Map{}
  24. var LoopListPath sync.Map = sync.Map{}
  25. //var ChanDels = map[int]string{}
  26. //var lock sync.Mutex
  27. var CC chan *lua.LState
  28. var CC2 chan *lua.LState
  29. var Chansize int
  30. var regcode, _ = regexp.Compile(`="(.*)"`)
  31. var InitCount int
  32. var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
  33. func InitSpider() {
  34. scriptMap := getSpiderScriptDB("all") //加载爬虫,初始化模板
  35. scriptMapFile := getSpiderScriptFile(false)
  36. for code, v := range scriptMap {
  37. LoopListPath.Store(code, v)
  38. InitCount++
  39. }
  40. for code, v := range scriptMapFile {
  41. LoopListPath.Store(code, v)
  42. InitCount++
  43. }
  44. if util.Config.Working == 0 {
  45. NoQueueScript() //高性能模式
  46. } else {
  47. if util.Config.Modal == 0 { //原始模式
  48. QueueUpScriptList()
  49. } else { //列表页和三级页分开采集
  50. go QueueUpScriptList() //节能模式列表页
  51. go QueueUpScriptDetail() //节能模式三级页
  52. }
  53. }
  54. }
  55. //高性能模式
  56. func NoQueueScript() {
  57. list, _ := MgoS.Find("spider_ldtime", nil, nil, map[string]interface{}{"code": 1, "uplimit": 1, "lowlimit": 1}, false, -1, -1)
  58. LoopListPath.Range(func(key, temp interface{}) bool {
  59. if info, ok := temp.(map[string]string); ok {
  60. code := info["code"]
  61. script := info["script"]
  62. sp, errstr := NewSpider(code, script)
  63. if errstr == "" && sp != nil && sp.Code != "nil" { //脚本加载成功
  64. //sp.Index = qu.IntAll(key)
  65. //sp2.Index = qu.IntAll(key)
  66. if info["createuser"] != "" {
  67. sp.UserName = info["createuser"]
  68. }
  69. if info["createuseremail"] != "" {
  70. sp.UserEmail = info["createuseremail"]
  71. }
  72. sp.MUserName = info["modifyuser"]
  73. sp.MUserEmail = info["modifyemail"]
  74. Allspiders.Store(sp.Code, sp)
  75. for _, tmp := range *list {
  76. if qu.ObjToString(tmp["code"]) == sp.Code {
  77. sp.UpperLimit = qu.IntAll(tmp["uplimit"])
  78. //sp2.UpperLimit = qu.IntAll(tmp["uplimit"])
  79. sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
  80. //sp2.LowerLimit = qu.IntAll(tmp["lowlimit"])
  81. break
  82. }
  83. }
  84. if util.Config.Modal == 1 { //列表页、三级页分开采集模式
  85. sp2, _ := NewSpider(code, script)
  86. sp2.UserName = sp.UserName
  87. sp2.UserEmail = sp.UserEmail
  88. sp2.MUserName = sp.MUserName
  89. sp2.MUserEmail = sp.MUserEmail
  90. sp2.IsMainThread = true //多线程采集时使用
  91. Allspiders2.Store(sp.Code, sp2)
  92. }
  93. sp.StartJob()
  94. //util.TimeSleepFunc(10*time.Millisecond, TimeSleepChan)
  95. } else {
  96. logger.Info(code, "脚本加载失败,请检查!")
  97. nowT := time.Now().Unix()
  98. username := "异常"
  99. if sp != nil {
  100. username = sp.MUserName
  101. }
  102. MgoS.Update("spider_loadfail",
  103. map[string]interface{}{
  104. "code": code,
  105. "modifytime": map[string]interface{}{
  106. "$gte": nowT - 12*3600,
  107. "$lte": nowT + 12*3600,
  108. },
  109. },
  110. map[string]interface{}{
  111. "$set": map[string]interface{}{
  112. "code": code,
  113. "type": "初始化",
  114. "script": script,
  115. "updatetime": nowT,
  116. "modifyuser": username,
  117. "event": util.Config.Uploadevent,
  118. "err": errstr,
  119. },
  120. }, true, false)
  121. }
  122. time.Sleep(100 * time.Millisecond)
  123. }
  124. return true
  125. })
  126. InitAllLuaOver <- true //爬虫初始化完毕
  127. logger.Info("高性能模式:LUA加载完成")
  128. numSpider := 0
  129. Allspiders.Range(func(key, value interface{}) bool {
  130. numSpider++
  131. return true
  132. })
  133. logger.Info("总共加载脚本数:", numSpider)
  134. }
  135. //排队模式下载列表页数据
  136. func QueueUpScriptList() {
  137. logger.Info("节能模式列表页")
  138. CC = make(chan *lua.LState, util.Config.Chansize)
  139. for i := 0; i < util.Config.Chansize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  140. CC <- lua.NewState(lua.Options{
  141. RegistrySize: 256 * 20,
  142. CallStackSize: 256,
  143. IncludeGoStackTrace: false,
  144. })
  145. }
  146. for {
  147. listLen, listNoLen, DelLen := 0, 0, 0
  148. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环", "初始化脚本数量:", InitCount)
  149. LoopListPath.Range(func(key, temp interface{}) bool {
  150. if info, ok := temp.(map[string]string); ok {
  151. code := info["code"]
  152. old_is_running := false
  153. tmp, b := Allspiders.Load(code)
  154. if b {
  155. if sp_old, ok := tmp.(*Spider); ok {
  156. if !sp_old.Stop {
  157. old_is_running = true
  158. }
  159. }
  160. }
  161. logger.Info("Code:", code, "Is Downloading List:", old_is_running)
  162. if !old_is_running { //判断当前爬虫是否正在执行
  163. script := info["script"]
  164. sp, errstr := NewSpider_New(code, script, false)
  165. //logger.Info("初始化脚本是否成功:", sp != nil, e.Value)
  166. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  167. //sp.Index = qu.IntAll(key)
  168. sp.UserName = info["createuser"]
  169. sp.UserEmail = info["createuseremail"]
  170. sp.MUserName = info["modifyuser"]
  171. sp.MUserEmail = info["modifyemail"]
  172. Allspiders.Store(code, sp)
  173. sp.StartJob()
  174. } else {
  175. nowT := time.Now().Unix()
  176. username := "异常"
  177. if sp != nil {
  178. username = sp.MUserName
  179. }
  180. MgoS.Update("spider_loadfail",
  181. map[string]interface{}{
  182. "code": code,
  183. "modifytime": map[string]interface{}{
  184. "$gte": nowT - 12*3600,
  185. "$lte": nowT + 12*3600,
  186. },
  187. },
  188. map[string]interface{}{
  189. "$set": map[string]interface{}{
  190. "code": code,
  191. "type": "初始化",
  192. "script": script,
  193. "updatetime": nowT,
  194. "modifyuser": username,
  195. "event": util.Config.Uploadevent,
  196. "err": errstr,
  197. },
  198. }, true, false)
  199. }
  200. if sp != nil && sp.IsHistoricalMend { //下载历史的爬虫执行一次后删除
  201. DelLen++
  202. LoopListPath.Delete(key)
  203. MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": 6}}, false, false)
  204. logger.Debug("Delete History Code:", code, b)
  205. }
  206. }
  207. listLen++
  208. } else {
  209. logger.Info("Code:", key, "Is Not Download List")
  210. listNoLen++
  211. }
  212. time.Sleep(100 * time.Millisecond)
  213. return true
  214. })
  215. time.Sleep(1 * time.Second)
  216. count_ok, count_no := 0, 0
  217. LoopListPath.Range(func(k, v interface{}) bool {
  218. if v != nil {
  219. count_ok++
  220. } else {
  221. count_no++
  222. }
  223. return true
  224. })
  225. InitCount = count_ok
  226. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环,列表长度,", listLen, listNoLen, "删除数量", DelLen, "执行完毕后数量统计:", count_ok, count_no)
  227. }
  228. }
  229. //排队模式下载三级页数据
  230. func QueueUpScriptDetail() {
  231. logger.Info("节能模式三级页")
  232. chanSize := util.Config.DetailChansize
  233. CC2 = make(chan *lua.LState, chanSize)
  234. for i := 0; i < chanSize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  235. CC2 <- lua.NewState(lua.Options{
  236. RegistrySize: 256 * 20,
  237. CallStackSize: 256,
  238. IncludeGoStackTrace: false,
  239. })
  240. }
  241. for {
  242. count_ok, count_no := 0, 0
  243. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环", "初始化脚本数量:", InitCount)
  244. LoopListPath.Range(func(key, temp interface{}) bool {
  245. if info, ok := temp.(map[string]string); ok {
  246. count_ok++
  247. code := info["code"]
  248. old_is_running := false
  249. tmp, b := Allspiders2.Load(code)
  250. if b {
  251. if sp_old, ok := tmp.(*Spider); ok {
  252. if !sp_old.Stop {
  253. old_is_running = true
  254. }
  255. }
  256. }
  257. logger.Info("Code:", code, "Is Downloading Detail:", old_is_running)
  258. if !old_is_running { //判断当前爬虫是否正在执行
  259. script := info["script"]
  260. sp, errstr := NewSpider_New(code, script, true)
  261. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  262. //sp.Index = qu.IntAll(key)
  263. sp.UserName = info["createuser"]
  264. sp.UserEmail = info["createuseremail"]
  265. sp.MUserName = info["modifyuser"]
  266. sp.MUserEmail = info["modifyemail"]
  267. sp.IsMainThread = true
  268. Allspiders2.Store(code, sp)
  269. go sp.DownloadListDetail(false) //下载三级页信息
  270. }
  271. }
  272. } else {
  273. logger.Info("Code:", key, "Is Not Download Detail")
  274. count_no++
  275. }
  276. time.Sleep(100 * time.Millisecond)
  277. return true
  278. })
  279. InitCount = count_ok
  280. time.Sleep(1 * time.Second)
  281. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环完毕,数量统计:", count_ok, count_no)
  282. }
  283. }
  284. //获取所有爬虫脚本--数据库
  285. func getSpiderScriptDB(code string) map[string]map[string]string {
  286. scriptSpider := map[string]map[string]string{}
  287. query := map[string]interface{}{}
  288. if code == "all" { //初始化所有脚本
  289. query = map[string]interface{}{"state": 5, "event": util.Config.Uploadevent}
  290. } else { //消息在线上传
  291. query = map[string]interface{}{"code": code, "event": util.Config.Uploadevent}
  292. //query = `{"$or":[{"iupload":1},{"iupload":3}],"event":` + fmt.Sprint(util.Config.Uploadevent) + `,"modifytime":{"$gt":1502937042}}`
  293. }
  294. listdb, _ := MgoEB.Find("luaconfig", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
  295. //临时历史附件
  296. //listdb, _ := MgoEB.Find("luaconfig_test", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
  297. for _, v := range *listdb {
  298. old := qu.IntAll(v["old_lua"])
  299. script := ""
  300. if old == 1 {
  301. script = fmt.Sprint(v["luacontent"])
  302. } else {
  303. if v["oldlua"] != nil {
  304. if v["luacontent"] != nil {
  305. script = v["luacontent"].(string)
  306. }
  307. } else {
  308. script = GetScriptByTmp(v)
  309. }
  310. }
  311. scriptSpider[fmt.Sprint(v["code"])] = map[string]string{
  312. "code": fmt.Sprint(v["code"]),
  313. "type": fmt.Sprint(v["state"]),
  314. "script": script,
  315. "createuser": fmt.Sprint(v["createuser"]),
  316. "createuseremail": fmt.Sprint(v["createuseremail"]),
  317. "modifyuser": fmt.Sprint(v["modifyuser"]),
  318. "modifyemail": fmt.Sprint(v["next"]),
  319. }
  320. }
  321. return scriptSpider
  322. }
  323. //获取所有爬虫脚本--文件
  324. func getSpiderScriptFile(newscript bool) map[string]map[string]string {
  325. scriptSpider := map[string]map[string]string{}
  326. filespider := 0
  327. filepath.Walk("res", func(path string, info os.FileInfo, err error) error {
  328. if info.IsDir() {
  329. return nil
  330. } else if strings.HasPrefix(info.Name(), "spider_") &&
  331. strings.HasSuffix(info.Name(), ".lua") {
  332. //过滤test目录
  333. if strings.Contains(path, "\\test\\") {
  334. return nil
  335. }
  336. loadfile := true
  337. if newscript {
  338. if time.Now().Unix() < info.ModTime().Add(time.Duration(15)*time.Minute).Unix() {
  339. loadfile = true
  340. } else {
  341. loadfile = false
  342. }
  343. }
  344. if loadfile {
  345. f, err := os.Open(path)
  346. defer f.Close()
  347. if err != nil {
  348. logger.Error(err.Error())
  349. }
  350. buf := bufio.NewReader(f)
  351. script := ""
  352. code := ""
  353. for {
  354. line, err := buf.ReadString('\n')
  355. if code == "" && strings.Contains(line, "spiderCode=") {
  356. res := regcode.FindAllStringSubmatch(line, -1)
  357. if len(res) > 0 {
  358. code = res[0][1]
  359. //logger.Info("code", code)
  360. } else {
  361. break
  362. }
  363. }
  364. if scriptSpider[code] == nil {
  365. script = script + line + "\n"
  366. } else {
  367. break
  368. }
  369. if err != nil {
  370. break
  371. }
  372. }
  373. if code != "" && script != "" && scriptSpider[code] == nil {
  374. scriptSpider[code] = map[string]string{
  375. "code": code,
  376. "type": "5",
  377. "script": script,
  378. //脚本文件属性值空
  379. "createuser": "",
  380. "createuseremail": "",
  381. "modifyuser": "",
  382. "modifyemail": "",
  383. }
  384. filespider = filespider + 1
  385. //logger.Info("script", script)
  386. }
  387. }
  388. }
  389. return nil
  390. })
  391. logger.Info("节点", util.Config.Uploadevent, "脚本文件爬虫数", filespider)
  392. return scriptSpider
  393. }
  394. //脚本下架、上架、重载
  395. func UpdateSpiderByCodeState(code, state string) (bool, error) {
  396. up := false
  397. var err error
  398. if state != "5" && state != "-1" { //脚本下架
  399. SpiderHeart.Delete(code) //脚本下架,删除脚本对应心跳
  400. logger.Info("下架脚本", code)
  401. if util.Config.Working == 1 { //队列模式
  402. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  403. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用(7700下架爬虫)
  404. continue
  405. }
  406. tmp, b := as.Load(code)
  407. if b {
  408. sp, ok := tmp.(*Spider)
  409. if ok {
  410. if !sp.Stop { //脚本未执行
  411. sp.Stop = true
  412. }
  413. }
  414. as.Delete(code)
  415. logger.Info("下架脚本,Allspiders删除")
  416. }
  417. }
  418. //LoopListPath.Range(func(k, v interface{}) bool {
  419. // //if v != nil {
  420. // // info, _ := v.(map[string]string)
  421. // // if info["code"] == code {
  422. // // LoopListPath.Store(k, nil)
  423. // // lock.Lock()
  424. // // defer lock.Unlock()
  425. // // ChanDels[qu.IntAll(k)] = code
  426. // // logger.Info("下架脚本,LoopListPath更新为nil,ChanDels中位置:", k)
  427. // // }
  428. // //}
  429. // if k == code {
  430. // LoopListPath.Delete(k)
  431. // logger.Info(code, "脚本下架成功")
  432. // return false //跳出循环
  433. // }
  434. // return true
  435. //})
  436. } else { //高性能模式
  437. for _, as := range []sync.Map{Allspiders, Allspiders2} {
  438. if tmp, ok := as.Load(code); ok {
  439. sp, ok := tmp.(*Spider)
  440. if ok {
  441. sp.Stop = true
  442. sp.L.Close()
  443. as.Delete(code)
  444. }
  445. }
  446. }
  447. }
  448. LoopListPath.Delete(code)
  449. logger.Info(code, "脚本下架成功")
  450. up = true
  451. err = nil
  452. } else if state == "-1" { //爬虫重采更新线上爬虫
  453. scriptMap := getSpiderScriptDB(code)
  454. logger.Info("更新线上脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  455. if util.Config.Working == 1 { //排队模式
  456. for _, v := range scriptMap {
  457. listsize := 0
  458. listHas := false
  459. count_ok, count_no := 0, 0
  460. LoopListPath.Range(func(key, val interface{}) bool {
  461. listsize++
  462. if tmp, ok := val.(map[string]string); ok {
  463. count_ok++
  464. if tmp["code"] == code && key == code { //队列存在,重载脚本
  465. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  466. listHas = true
  467. LoopListPath.Store(key, v)
  468. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  469. logger.Info("队列模式更新列表页信息状态", code)
  470. }
  471. } else {
  472. count_no++
  473. }
  474. return true
  475. })
  476. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  477. if !listHas { //队列不存在
  478. logger.Info("重采更新爬虫失败:", code)
  479. up = false
  480. err = errors.New("爬虫不在线:" + code)
  481. } else {
  482. up = true
  483. err = nil
  484. logger.Info("重采更新爬虫成功", code)
  485. }
  486. }
  487. } else { //高性能模式
  488. for k, v := range scriptMap {
  489. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  490. sp := spd.(*Spider)
  491. sp.ScriptFile = v["script"]
  492. if v["createuser"] != "" {
  493. sp.UserName = v["createuser"]
  494. }
  495. if v["createuseremail"] != "" {
  496. sp.UserEmail = v["createuseremail"]
  497. }
  498. sp.MUserName = v["modifyuser"]
  499. sp.MUserEmail = v["modifyemail"]
  500. Allspiders.Store(k, sp)
  501. up = true
  502. err = nil
  503. logger.Info("重采更新爬虫成功", sp.Code)
  504. } else { //不存在
  505. up = false
  506. err = errors.New("爬虫不在线:" + code)
  507. logger.Info("重采更新爬虫失败:", code)
  508. }
  509. //Allspiders2
  510. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  511. sp2 := spd2.(*Spider)
  512. sp2.ScriptFile = v["script"]
  513. if v["createuser"] != "" {
  514. sp2.UserName = v["createuser"]
  515. }
  516. if v["createuseremail"] != "" {
  517. sp2.UserEmail = v["createuseremail"]
  518. }
  519. sp2.MUserName = v["modifyuser"]
  520. sp2.MUserEmail = v["modifyemail"]
  521. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  522. Allspiders2.Store(k, sp2)
  523. // up = true
  524. // err = nil
  525. logger.Info("Allspiders2重采更新爬虫成功", sp2.Code)
  526. } else { //不存在
  527. // up = false
  528. // err = errors.New("爬虫不在线:" + code)
  529. logger.Info("Allspiders2重采更新爬虫失败:", code)
  530. }
  531. }
  532. }
  533. } else { //脚本上架
  534. scriptMap := getSpiderScriptDB(code)
  535. logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  536. if util.Config.Working == 1 { //排队模式
  537. for _, v := range scriptMap {
  538. listsize := 0
  539. listHas := false
  540. count_ok, count_no := 0, 0
  541. LoopListPath.Range(func(key, val interface{}) bool {
  542. listsize++
  543. if tmp, ok := val.(map[string]string); ok { //此处判断仅仅为了得到count_ok的值,可直接判断key==code
  544. count_ok++
  545. if tmp["code"] == code && code == key { //队列存在,重载脚本
  546. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  547. listHas = true
  548. LoopListPath.Store(key, v)
  549. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  550. logger.Info("队列模式更新列表页信息状态", code)
  551. }
  552. } else {
  553. count_no++
  554. }
  555. return true
  556. })
  557. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  558. if !listHas { //队列中不存在,新增
  559. logger.Info("上架新增脚本,队列中不存在")
  560. LoopListPath.Store(code, v) //上架
  561. // lock.Lock()
  562. // defer lock.Unlock()
  563. // if len(ChanDels) > 0 {
  564. // for i, _ := range ChanDels {
  565. // logger.Info("上架新增脚本,替补队列中位置", i)
  566. // LoopListPath.Store(i, v)
  567. // delete(ChanDels, i)
  568. // break
  569. // }
  570. // } else {
  571. // logger.Info("上架新增脚本,新增队列中位置", listsize)
  572. // LoopListPath.Store(listsize, v) //上架
  573. // }
  574. //校验是否上架成功
  575. saveList := false //记录是否上架成功
  576. listsize, count_ok, count_no = 0, 0, 0
  577. LoopListPath.Range(func(key, val interface{}) bool {
  578. listsize++
  579. if tmp, ok := val.(map[string]string); ok {
  580. count_ok++
  581. if tmp["code"] == code && key == code { //队列存在
  582. saveList = true
  583. logger.Info("上架脚本成功", code)
  584. }
  585. } else {
  586. count_no++
  587. }
  588. return true
  589. })
  590. logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  591. if !saveList { //上架失败
  592. logger.Info("上架脚本", code, " 失败")
  593. return false, errors.New("use " + code + " failed")
  594. }
  595. }
  596. logger.Info("上架新增脚本", code)
  597. up = true
  598. }
  599. } else { //高性能模式
  600. for k, v := range scriptMap {
  601. LoopListPath.Store(k, v)
  602. //1、Allspiders对应7000、7100、7400脚本上架下载数据(列表页爬虫集合)
  603. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  604. sp := spd.(*Spider)
  605. sp.ScriptFile = v["script"]
  606. if v["createuser"] != "" {
  607. sp.UserName = v["createuser"]
  608. }
  609. if v["createuseremail"] != "" {
  610. sp.UserEmail = v["createuseremail"]
  611. }
  612. sp.MUserName = v["modifyuser"]
  613. sp.MUserEmail = v["modifyemail"]
  614. //sp.LoadScript(k, sp.ScriptFile, true) //更新上架,重载脚本
  615. Allspiders.Store(k, sp)
  616. up = true
  617. err = nil
  618. logger.Info("上架重载脚本", sp.Code)
  619. } else { //新增脚本
  620. sp, errstr := NewSpider(k, v["script"])
  621. if errstr == "" && sp != nil && sp.Code != "nil" {
  622. if v["createuser"] != "" {
  623. sp.UserName = v["createuser"]
  624. }
  625. if v["createuseremail"] != "" {
  626. sp.UserEmail = v["createuseremail"]
  627. }
  628. sp.MUserName = v["modifyuser"]
  629. sp.MUserEmail = v["modifyemail"]
  630. Allspiders.Store(k, sp)
  631. sp.Stop = false
  632. sp.StartJob()
  633. up = true
  634. err = nil
  635. logger.Info("上架新增脚本", sp.Code)
  636. } else {
  637. err = errors.New("新增失败")
  638. nowT := time.Now().Unix()
  639. MgoS.Update("spider_loadfail",
  640. map[string]interface{}{
  641. "code": k,
  642. "modifytime": map[string]interface{}{
  643. "$gte": nowT - 12*3600,
  644. "$lte": nowT + 12*3600,
  645. },
  646. },
  647. map[string]interface{}{
  648. "$set": map[string]interface{}{
  649. "code": k,
  650. "type": "新增初始化脚本",
  651. "script": v["script"],
  652. "updatetime": nowT,
  653. "modifyuser": sp.MUserName,
  654. "event": util.Config.Uploadevent,
  655. "err": errstr,
  656. },
  657. }, true, false)
  658. }
  659. }
  660. //2、Allspiders2对应7100、7110、7400上架采集三级页数据(Allspiders2三级页爬虫集合)
  661. if util.Config.Modal == 1 { //高性能老模式不根据列表页数据采三级页(7000、7410)
  662. //Allspiders2
  663. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  664. sp2 := spd2.(*Spider)
  665. sp2.ScriptFile = v["script"]
  666. if v["createuser"] != "" {
  667. sp2.UserName = v["createuser"]
  668. }
  669. if v["createuseremail"] != "" {
  670. sp2.UserEmail = v["createuseremail"]
  671. }
  672. sp2.MUserName = v["modifyuser"]
  673. sp2.MUserEmail = v["modifyemail"]
  674. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  675. Allspiders2.Store(k, sp2) //重载后放入集合
  676. UpdateHighListDataByCode(k) //爬虫更新上架后,重置数据state=0
  677. // up = true
  678. // err = nil
  679. logger.Info("Allspiders2上架重载脚本", sp2.Code)
  680. } else { //新增脚本
  681. sp2, errstr := NewSpider(k, v["script"])
  682. if errstr == "" && sp2 != nil && sp2.Code != "nil" {
  683. if v["createuser"] != "" {
  684. sp2.UserName = v["createuser"]
  685. }
  686. if v["createuseremail"] != "" {
  687. sp2.UserEmail = v["createuseremail"]
  688. }
  689. sp2.MUserName = v["modifyuser"]
  690. sp2.MUserEmail = v["modifyemail"]
  691. sp2.Stop = false
  692. sp2.IsMainThread = true //多线程采集时使用
  693. go sp2.DownloadHighDetail(true) //根据列表页数据下载三级页
  694. Allspiders2.Store(k, sp2)
  695. // up = true
  696. // err = nil
  697. logger.Info("Allspiders2上架新增脚本", sp2.Code)
  698. } /*else {
  699. err = errors.New("新增失败")
  700. mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  701. "code": k,
  702. "type": "新增脚本失败",
  703. "script": v["script"],
  704. "intime": time.Now().Format(qu.Date_Full_Layout),
  705. "event": util.Config.Uploadevent,
  706. })
  707. }*/
  708. }
  709. }
  710. }
  711. }
  712. }
  713. logger.Info("上下架:", up, err)
  714. return up, err
  715. }
  716. //定时重载脚本文件
  717. func ReloadSpiderFile() {
  718. scriptMap := getSpiderScriptFile(true)
  719. for k, v := range scriptMap {
  720. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  721. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用
  722. continue
  723. }
  724. if spd, ok := as.Load(k); ok { //对应脚本已存在,更新
  725. sp := spd.(*Spider)
  726. logger.Info("定时重载脚本", sp.Code)
  727. sp.ScriptFile = v["script"]
  728. if v["createuser"] != "" {
  729. sp.UserName = v["createuser"]
  730. }
  731. if v["createuseremail"] != "" {
  732. sp.UserEmail = v["createuseremail"]
  733. }
  734. sp.MUserName = v["modifyuser"]
  735. sp.MUserEmail = v["modifyemail"]
  736. as.Store(k, sp)
  737. } else { //新增脚本
  738. var sp *Spider
  739. var errstr string
  740. if util.Config.Working == 1 { //排队模式
  741. if i == 0 {
  742. //length := 0
  743. //LoopListPath.Range(func(k, v interface{}) bool {
  744. // length++
  745. // return true
  746. //})
  747. LoopListPath.Store(k, v) //排队模式Allspiders,Allspiders2共用一个LoopListPath,新增一次即可
  748. sp, errstr = NewSpider_New(k, v["script"], false)
  749. } else {
  750. sp, errstr = NewSpider_New(k, v["script"], true)
  751. }
  752. } else {
  753. sp, errstr = NewSpider(k, v["script"])
  754. }
  755. if errstr == "" && sp != nil && sp.Code != "nil" {
  756. if v["createuser"] != "" {
  757. sp.UserName = v["createuser"]
  758. }
  759. if v["createuseremail"] != "" {
  760. sp.UserEmail = v["createuseremail"]
  761. }
  762. sp.MUserName = v["modifyuser"]
  763. sp.MUserEmail = v["modifyemail"]
  764. as.Store(k, sp)
  765. if util.Config.Working == 1 {
  766. sp.Stop = true
  767. // if i == 0 {
  768. // length := 0
  769. // LoopListPath.Range(func(k, v interface{}) bool {
  770. // length++
  771. // return true
  772. // })
  773. // LoopListPath.Store(length, v)
  774. // }
  775. } else {
  776. sp.Stop = false
  777. if i == 0 { //高性能模式只有Allspiders启动爬虫,Allspiders2只负责下三级页
  778. sp.StartJob()
  779. }
  780. }
  781. logger.Info("定时重载脚本--新增", sp.Code)
  782. } else {
  783. if i == 0 {
  784. nowT := time.Now().Unix()
  785. MgoS.Update("spider_loadfail",
  786. map[string]interface{}{
  787. "code": k,
  788. "modifytime": map[string]interface{}{
  789. "$gte": nowT - 12*3600,
  790. "$lte": nowT + 12*3600,
  791. },
  792. },
  793. map[string]interface{}{
  794. "$set": map[string]interface{}{
  795. "code": k,
  796. "type": "定时重载--新增失败",
  797. "script": v["script"],
  798. "updatetime": nowT,
  799. "modifyuser": sp.MUserName,
  800. "event": util.Config.Uploadevent,
  801. "err": errstr,
  802. },
  803. }, true, false)
  804. }
  805. }
  806. }
  807. }
  808. // if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  809. // sp := spd.(*Spider)
  810. // logger.Info("定时重载脚本", sp.Code)
  811. // sp.ScriptFile = v["script"]
  812. // if v["createuser"] != "" {
  813. // sp.UserName = v["createuser"]
  814. // }
  815. // if v["createuseremail"] != "" {
  816. // sp.UserEmail = v["createuseremail"]
  817. // }
  818. // sp.MUserName = v["modifyuser"]
  819. // sp.MUserEmail = v["modifyemail"]
  820. // Allspiders.Store(k, sp)
  821. // } else { //新增脚本
  822. // var sp *Spider
  823. // if util.Config.Working == 1 { //排队模式
  824. // length := 0
  825. // LoopListPath.Range(func(k, v interface{}) bool {
  826. // length++
  827. // return true
  828. // })
  829. // LoopListPath.Store(length, v)
  830. // sp = NewSpider_New(k, v["script"], false)
  831. // } else {
  832. // sp = NewSpider(k, v["script"])
  833. // }
  834. // if sp != nil && sp.Code != "nil" {
  835. // if v["createuser"] != "" {
  836. // sp.UserName = v["createuser"]
  837. // }
  838. // if v["createuseremail"] != "" {
  839. // sp.UserEmail = v["createuseremail"]
  840. // }
  841. // sp.MUserName = v["modifyuser"]
  842. // sp.MUserEmail = v["modifyemail"]
  843. // Allspiders.Store(k, sp)
  844. // if util.Config.Working == 1 {
  845. // sp.Stop = true
  846. // length := 0
  847. // LoopListPath.Range(func(k, v interface{}) bool {
  848. // length++
  849. // return true
  850. // })
  851. // LoopListPath.Store(length, v)
  852. // } else {
  853. // sp.Stop = false
  854. // sp.StartJob()
  855. // }
  856. // logger.Info("定时重载脚本--新增", sp.Code)
  857. // } else {
  858. // mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  859. // "code": k,
  860. // "type": "定时重载--新增失败",
  861. // "script": v["script"],
  862. // "intime": time.Now().Format(qu.Date_Full_Layout),
  863. // "event": util.Config.Uploadevent,
  864. // })
  865. // }
  866. // }
  867. }
  868. util.TimeAfterFunc(time.Duration(15)*time.Minute, ReloadSpiderFile, TimeChan)
  869. }
  870. //排队模式生成爬虫
  871. func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
  872. defer mu.Catch()
  873. spider := &Spider{}
  874. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, false)
  875. if err != "" {
  876. return nil, err
  877. }
  878. spider.Code = spider.GetVar("spiderCode")
  879. spider.Script.SCode = spider.Code
  880. spider.Name = spider.GetVar("spiderName")
  881. spider.Channel = spider.GetVar("spiderChannel")
  882. //spider.LastExecTime = GetLastExectime(spider.Code)
  883. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  884. spider.Collection = spider.GetVar("spider2Collection")
  885. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  886. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  887. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  888. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  889. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  890. if spiderSleepBase == -1 {
  891. spider.SleepBase = 1000
  892. } else {
  893. spider.SleepBase = spiderSleepBase
  894. }
  895. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  896. if spiderSleepRand == -1 {
  897. spider.SleepRand = 1000
  898. } else {
  899. spider.SleepRand = spiderSleepRand
  900. }
  901. spiderTimeout := spider.GetIntVar("spiderTimeout")
  902. if spiderTimeout == -1 {
  903. spider.Timeout = 60
  904. } else {
  905. spider.Timeout = int64(spiderTimeout)
  906. }
  907. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  908. if v, ok := Allspiders.Load(spider.Code); ok {
  909. sp := v.(*Spider)
  910. spider.TodayDowncount = sp.TodayDowncount
  911. spider.ToDayRequestNum = sp.ToDayRequestNum
  912. spider.YesterdayDowncount = sp.YesterdayDowncount
  913. spider.YestoDayRequestNum = sp.YestoDayRequestNum
  914. spider.TotalDowncount = sp.TotalDowncount
  915. spider.TotalRequestNum = sp.TotalRequestNum
  916. spider.ErrorNum = sp.ErrorNum
  917. spider.RoundCount = sp.RoundCount
  918. }
  919. spider.UserName = spider.GetVar("spiderUserName")
  920. spider.UserEmail = spider.GetVar("spiderUserEmail")
  921. spider.UploadTime = spider.GetVar("spiderUploadTime")
  922. //新增历史补漏
  923. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  924. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  925. //新老爬虫
  926. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  927. return spider, ""
  928. }
  929. //高性能模式生成爬虫
  930. func NewSpider(code, luafile string) (*Spider, string) {
  931. defer mu.Catch()
  932. spider := &Spider{}
  933. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, false)
  934. if err != "" {
  935. return nil, err
  936. }
  937. spider.Code = spider.GetVar("spiderCode")
  938. spider.SCode = spider.Code
  939. spider.Name = spider.GetVar("spiderName")
  940. spider.Channel = spider.GetVar("spiderChannel")
  941. //spider.LastExecTime = GetLastExectime(spider.Code)
  942. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  943. spider.Collection = spider.GetVar("spider2Collection")
  944. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  945. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  946. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  947. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  948. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  949. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  950. if spiderSleepBase == -1 {
  951. spider.SleepBase = 1000
  952. } else {
  953. spider.SleepBase = spiderSleepBase
  954. }
  955. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  956. if spiderSleepRand == -1 {
  957. spider.SleepRand = 1000
  958. } else {
  959. spider.SleepRand = spiderSleepRand
  960. }
  961. spiderTimeout := spider.GetIntVar("spiderTimeout")
  962. if spiderTimeout == -1 {
  963. spider.Timeout = 60
  964. } else {
  965. spider.Timeout = int64(spiderTimeout)
  966. }
  967. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  968. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  969. tmp := GetDownloadLast(spider.Code, date) //
  970. if len(tmp) > 0 {
  971. spider.TodayDowncount = int32(qu.IntAll(tmp["todaydowncount"]))
  972. spider.ToDayRequestNum = int32(qu.IntAll(tmp["todaydownreq"]))
  973. spider.YesterdayDowncount = int32(qu.IntAll(tmp["yesdowncount"]))
  974. spider.YestoDayRequestNum = int32(qu.IntAll(tmp["yesdownreq"]))
  975. spider.TotalDowncount = spider.TodayDowncount + int32(qu.IntAll(tmp["totaldown"]))
  976. spider.TotalRequestNum = spider.ToDayRequestNum + int32(qu.IntAll(tmp["totalreq"]))
  977. }
  978. spider.UserName = spider.GetVar("spiderUserName")
  979. spider.UserEmail = spider.GetVar("spiderUserEmail")
  980. spider.UploadTime = spider.GetVar("spiderUploadTime")
  981. //新增历史补漏
  982. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  983. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  984. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  985. //新老爬虫
  986. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  987. return spider, ""
  988. }
  989. //多线程生成爬虫
  990. func NewSpiderForThread(code, luafile string) (*Spider, string) {
  991. defer mu.Catch()
  992. spider := &Spider{}
  993. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, true)
  994. if err != "" {
  995. return nil, err
  996. }
  997. spider.Code = spider.GetVar("spiderCode")
  998. spider.SCode = spider.Code
  999. spider.Script.SCode = spider.Code
  1000. spider.Name = spider.GetVar("spiderName")
  1001. spider.Channel = spider.GetVar("spiderChannel")
  1002. //spider.LastExecTime = GetLastExectime(spider.Code)
  1003. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  1004. spider.Collection = spider.GetVar("spider2Collection")
  1005. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  1006. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  1007. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  1008. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  1009. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  1010. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  1011. if spiderSleepBase == -1 {
  1012. spider.SleepBase = 1000
  1013. } else {
  1014. spider.SleepBase = spiderSleepBase
  1015. }
  1016. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  1017. if spiderSleepRand == -1 {
  1018. spider.SleepRand = 1000
  1019. } else {
  1020. spider.SleepRand = spiderSleepRand
  1021. }
  1022. spiderTimeout := spider.GetIntVar("spiderTimeout")
  1023. if spiderTimeout == -1 {
  1024. spider.Timeout = 60
  1025. } else {
  1026. spider.Timeout = int64(spiderTimeout)
  1027. }
  1028. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  1029. spider.UserName = spider.GetVar("spiderUserName")
  1030. spider.UserEmail = spider.GetVar("spiderUserEmail")
  1031. spider.UploadTime = spider.GetVar("spiderUploadTime")
  1032. //新增历史补漏
  1033. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  1034. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  1035. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  1036. //新老爬虫
  1037. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  1038. return spider, ""
  1039. }
  1040. //下载量入库
  1041. func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum, yesterdayDowncount, yestoDayRequestNum int32) {
  1042. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  1043. updata := map[string]interface{}{}
  1044. if addtotal {
  1045. updata = map[string]interface{}{
  1046. "$inc": map[string]interface{}{"totaldown": todayDowncount, "totalreq": todayRequestNum},
  1047. "$set": map[string]interface{}{
  1048. "yesdowncount": yesterdayDowncount,
  1049. "yesdownreq": yestoDayRequestNum,
  1050. "todaydowncount": todayDowncount,
  1051. "todaydownreq": todayRequestNum,
  1052. "date": date,
  1053. "year": time.Now().Year(),
  1054. "month": time.Now().Month(),
  1055. "day": time.Now().Day(),
  1056. },
  1057. }
  1058. } else {
  1059. updata = map[string]interface{}{
  1060. "$set": map[string]interface{}{
  1061. "yesdowncount": yesterdayDowncount,
  1062. "yesdownreq": yestoDayRequestNum,
  1063. "todaydowncount": todayDowncount,
  1064. "todaydownreq": todayRequestNum,
  1065. "date": date,
  1066. "year": time.Now().Year(),
  1067. "month": time.Now().Month(),
  1068. "day": time.Now().Day(),
  1069. },
  1070. }
  1071. }
  1072. MgoS.Update("spider_downlog", map[string]interface{}{"code": code, "date": date}, updata, true, false)
  1073. }
  1074. //获取下载的上下限(没用)
  1075. func GetLimitDownload(code string) (uplimit, lowlimit int) {
  1076. defer mu.Catch()
  1077. ret, _ := MgoS.FindOne("spider_ldtime", map[string]interface{}{"code": code})
  1078. if ret != nil && len(*ret) > 0 {
  1079. uplimit = qu.IntAll((*ret)["uplimit"])
  1080. lowlimit = qu.IntAll((*ret)["lowlimit"])
  1081. return uplimit, lowlimit
  1082. } else {
  1083. return 100, 0
  1084. }
  1085. }
  1086. //拼装脚本
  1087. func GetScriptByTmp(luaconfig map[string]interface{}) string {
  1088. defer mu.Catch()
  1089. script := ""
  1090. if luaconfig["listcheck"] == nil {
  1091. luaconfig["listcheck"] = ""
  1092. }
  1093. if luaconfig["contentcheck"] == nil {
  1094. luaconfig["contentcheck"] = ""
  1095. }
  1096. if luaconfig != nil && len(luaconfig) > 0 {
  1097. common := luaconfig["param_common"].([]interface{})
  1098. //新增spiderIsHistoricalMend spiderIsMustDownload
  1099. if len(common) == 15 {
  1100. common = append(common, "", "", "")
  1101. } else {
  1102. common = append(common, false, false, "", "", "")
  1103. }
  1104. for k, v := range common {
  1105. if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
  1106. common[k] = qu.IntAll(v)
  1107. }
  1108. }
  1109. script, _ = GetTmpModel(map[string][]interface{}{"common": common})
  1110. //发布时间
  1111. script_time := ""
  1112. if luaconfig["type_time"] == 0 { //向导模式
  1113. time := luaconfig["param_time"].([]interface{})
  1114. script_time, _ = GetTmpModel(map[string][]interface{}{
  1115. "time": time,
  1116. })
  1117. } else { //专家模式
  1118. script_time = luaconfig["str_time"].(string)
  1119. }
  1120. //列表页
  1121. script_list := ""
  1122. if luaconfig["type_list"] == 0 { //向导模式
  1123. list := luaconfig["param_list"].([]interface{})
  1124. addrs := strings.Split(list[1].(string), "\n")
  1125. if len(addrs) > 0 {
  1126. for k, v := range addrs {
  1127. addrs[k] = "'" + v + "'"
  1128. }
  1129. list[1] = strings.Join(addrs, ",")
  1130. } else {
  1131. list[1] = ""
  1132. }
  1133. script_list, _ = GetTmpModel(map[string][]interface{}{
  1134. "list": list,
  1135. "listcheck": []interface{}{luaconfig["listcheck"]},
  1136. })
  1137. } else { //专家模式
  1138. script_list = luaconfig["str_list"].(string)
  1139. }
  1140. //三级页
  1141. script_content := ""
  1142. if luaconfig["type_content"] == 0 { //向导模式
  1143. content := luaconfig["param_content"].([]interface{})
  1144. script_content, _ = GetTmpModel(map[string][]interface{}{
  1145. "content": content,
  1146. "contentcheck": []interface{}{luaconfig["contentcheck"]},
  1147. })
  1148. } else { //专家模式
  1149. script_content = luaconfig["str_content"].(string)
  1150. }
  1151. script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"])
  1152. script += `
  1153. ` + script_time + `
  1154. ` + script_list + `
  1155. ` + script_content
  1156. script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
  1157. }
  1158. return script
  1159. }
  1160. //生成爬虫脚本
  1161. func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
  1162. qu.Try(func() {
  1163. //param_common拼接
  1164. if param != nil && param["common"] != nil {
  1165. if len(param["common"]) < 12 {
  1166. err = "公共参数配置不全"
  1167. } else {
  1168. script = fmt.Sprintf(util.Tmp_common, param["common"]...)
  1169. }
  1170. }
  1171. //发布时间拼接
  1172. if param != nil && param["time"] != nil {
  1173. if len(param["time"]) < 3 {
  1174. err = "方法:time-参数配置不全"
  1175. } else {
  1176. script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
  1177. }
  1178. }
  1179. //列表页拼接
  1180. if param != nil && param["list"] != nil {
  1181. if len(param["list"]) < 7 {
  1182. err = "方法:list-参数配置不全"
  1183. } else {
  1184. list := []interface{}{param["listcheck"][0]}
  1185. list = append(list, param["list"]...)
  1186. script += fmt.Sprintf(util.Tmp_pagelist, list...)
  1187. script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
  1188. }
  1189. }
  1190. //详情页拼接
  1191. if param != nil && param["content"] != nil {
  1192. if len(param["content"]) < 2 {
  1193. err = "方法:content-参数配置不全"
  1194. } else {
  1195. content := []interface{}{param["contentcheck"][0]}
  1196. content = append(content, param["content"]...)
  1197. script += fmt.Sprintf(util.Tmp_content, content...)
  1198. }
  1199. }
  1200. }, func(e interface{}) {
  1201. err = e
  1202. })
  1203. return script, err
  1204. }
  1205. //补充模型
  1206. func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
  1207. defer mu.Catch()
  1208. //补充通用信息
  1209. commstr := `item["spidercode"]="` + comm[0].(string) + `";`
  1210. commstr += `item["site"]="` + comm[1].(string) + `";`
  1211. commstr += `item["channel"]="` + comm[2].(string) + `";`
  1212. script = strings.Replace(script, "--Common--", commstr, -1)
  1213. //补充模型信息
  1214. modelstr := ""
  1215. for k, v := range model {
  1216. modelstr += `item["` + k + `"]="` + v.(string) + `";`
  1217. }
  1218. script = strings.Replace(script, "--Model--", modelstr, -1)
  1219. return script
  1220. }
  1221. //爬虫信息提交编辑器(心跳)
  1222. func SpiderInfoSend() {
  1223. time.Sleep(15 * time.Second)
  1224. list := []interface{}{}
  1225. Allspiders.Range(func(key, value interface{}) bool {
  1226. v := value.(*Spider)
  1227. info := map[string]interface{}{}
  1228. info["code"] = v.Code
  1229. info["todayDowncount"] = v.TodayDowncount
  1230. info["toDayRequestNum"] = v.ToDayRequestNum
  1231. info["yesterdayDowncount"] = v.YesterdayDowncount
  1232. info["yestoDayRequestNum"] = v.YestoDayRequestNum
  1233. info["totalDowncount"] = v.TotalDowncount
  1234. info["totalRequestNum"] = v.TotalRequestNum
  1235. info["errorNum"] = v.ErrorNum
  1236. info["roundCount"] = v.RoundCount
  1237. info["runRate"] = v.SpiderRunRate
  1238. info["lastHeartbeat"] = v.LastHeartbeat
  1239. info["lastDowncount"] = v.LastDowncount
  1240. info["lstate"] = v.L.Status(v.L)
  1241. list = append(list, info)
  1242. return true
  1243. })
  1244. bs, _ := json.Marshal(list)
  1245. value := url.Values{
  1246. "data": []string{util.Se.EncodeString(string(bs))},
  1247. "type": []string{"info"},
  1248. }
  1249. _, err := http.PostForm(util.Config.Editoraddr, value)
  1250. if err != nil {
  1251. logger.Error("send to editor: ", err.Error())
  1252. }
  1253. util.TimeAfterFunc(5*time.Minute, SpiderInfoSend, TimeChan)
  1254. }
  1255. //保存心跳信息
  1256. func SaveHeartInfo() {
  1257. time.Sleep(30 * time.Second)
  1258. num := 0
  1259. SpiderHeart.Range(func(key, value interface{}) bool {
  1260. code := key.(string)
  1261. heart, ok := value.(*Heart)
  1262. if ok {
  1263. num++
  1264. update := []map[string]interface{}{}
  1265. update = append(update, map[string]interface{}{"code": code})
  1266. update = append(update, map[string]interface{}{"$set": map[string]interface{}{
  1267. "site": heart.Site,
  1268. "channel": heart.Channel,
  1269. "list": heart.ListHeart,
  1270. "findlist": heart.FindListHeart,
  1271. "detail": heart.DetailHeart,
  1272. "detailexecute": heart.DetailExecuteHeart,
  1273. "modifyuser": heart.ModifyUser,
  1274. "event": util.Config.Uploadevent,
  1275. "updatetime": time.Now().Unix(),
  1276. "del": false,
  1277. }})
  1278. UpdataHeartCache <- update
  1279. }
  1280. return true
  1281. })
  1282. logger.Info("更新心跳个数:", num)
  1283. time.AfterFunc(20*time.Minute, SaveHeartInfo)
  1284. }
  1285. //信息提交编辑器
  1286. func SpiderCodeSendToEditor(code string) {
  1287. defer qu.Catch()
  1288. ok := false
  1289. for i := 1; i <= 3; i++ {
  1290. logger.Info("Code:", code, " times:", i, " Send Move Event")
  1291. list := []interface{}{}
  1292. list = append(list, code)
  1293. bs, _ := json.Marshal(list)
  1294. value := url.Values{
  1295. "data": []string{util.Se.EncodeString(string(bs))},
  1296. "type": []string{"code"},
  1297. }
  1298. res, err := http.PostForm(util.Config.Editoraddr, value)
  1299. if err != nil {
  1300. logger.Error("Send To Editor For Move Event Failed,Code:", code)
  1301. } else {
  1302. if res != nil {
  1303. res.Body.Close()
  1304. }
  1305. ok = true
  1306. break
  1307. }
  1308. }
  1309. logger.Info("Code:", code, " Send Move Event:", ok)
  1310. MgoE.Save("luamovelog", map[string]interface{}{
  1311. "code": code,
  1312. "comeintime": time.Now().Unix(),
  1313. "type": "sendfail",
  1314. "ok": ok,
  1315. })
  1316. }