handler.go 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251
  1. package spider
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "github.com/cjoudrey/gluahttp"
  8. lujson "github.com/yuin/gopher-json"
  9. "net/http"
  10. "net/url"
  11. "os"
  12. "path/filepath"
  13. qu "qfw/util"
  14. "regexp"
  15. util "spiderutil"
  16. "strings"
  17. "sync"
  18. "time"
  19. "github.com/donnie4w/go-logger/logger"
  20. "github.com/yuin/gopher-lua"
  21. )
  22. var SpiderHeart sync.Map = sync.Map{} //爬虫心跳
  23. var Allspiders sync.Map = sync.Map{} //存储正在执行采集列表页任务的爬虫集合
  24. var Allspiders2 sync.Map = sync.Map{} //存储正在执行采集详情页任务的爬虫集合
  25. var LoopListPath sync.Map = sync.Map{} //存储爬虫集合
  26. //var ChanDels = map[int]string{}
  27. //var lock sync.Mutex
  28. var CC chan *lua.LState
  29. var CC2 chan *lua.LState
  30. var Chansize int
  31. var regcode, _ = regexp.Compile(`="(.*)"`)
  32. var InitCount int
  33. var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
  34. func InitSpider() {
  35. scriptMap := getSpiderScriptDB("all") //加载爬虫,初始化模板
  36. scriptMapFile := getSpiderScriptFile(false)
  37. for code, v := range scriptMap {
  38. LoopListPath.Store(code, v)
  39. InitCount++
  40. }
  41. for code, v := range scriptMapFile {
  42. LoopListPath.Store(code, v)
  43. InitCount++
  44. }
  45. if util.Config.Working == 0 {
  46. NoQueueScript() //高性能模式
  47. } else {
  48. if util.Config.Modal == 0 { //原始模式
  49. QueueUpScriptList()
  50. } else { //列表页和三级页分开采集
  51. go QueueUpScriptList() //节能模式列表页
  52. go QueueUpScriptDetail() //节能模式三级页
  53. }
  54. }
  55. }
  56. // 高性能模式
  57. func NoQueueScript() {
  58. list, _ := MgoS.Find("spider_ldtime", nil, nil, map[string]interface{}{"code": 1, "uplimit": 1, "lowlimit": 1}, false, -1, -1)
  59. LoopListPath.Range(func(key, temp interface{}) bool {
  60. if info, ok := temp.(map[string]string); ok {
  61. code := info["code"]
  62. script := info["script"]
  63. sp, errstr := CreateSpider(code, script, true, false)
  64. if errstr == "" && sp != nil && sp.Code != "nil" { //脚本加载成功
  65. //sp.Index = qu.IntAll(key)
  66. //sp2.Index = qu.IntAll(key)
  67. Allspiders.Store(sp.Code, sp)
  68. for _, tmp := range *list {
  69. if qu.ObjToString(tmp["code"]) == sp.Code {
  70. sp.UpperLimit = qu.IntAll(tmp["uplimit"])
  71. sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
  72. break
  73. }
  74. }
  75. if !Supplement && util.Config.Modal == 1 && !util.Config.IsHistoryEvent { //列表页、三级页分开采集模式
  76. sp2, _ := CreateSpider(code, script, true, false)
  77. sp2.IsMainThread = true //多线程采集时使用
  78. Allspiders2.Store(sp.Code, sp2)
  79. }
  80. sp.StartJob()
  81. //util.TimeSleepFunc(10*time.Millisecond, TimeSleepChan)
  82. } else {
  83. logger.Info(code, "脚本加载失败,请检查!")
  84. nowT := time.Now().Unix()
  85. username := "异常"
  86. if sp != nil {
  87. username = sp.MUserName
  88. }
  89. MgoS.Update("spider_loadfail",
  90. map[string]interface{}{
  91. "code": code,
  92. "modifytime": map[string]interface{}{
  93. "$gte": nowT - 12*3600,
  94. "$lte": nowT + 12*3600,
  95. },
  96. },
  97. map[string]interface{}{
  98. "$set": map[string]interface{}{
  99. "code": code,
  100. "type": "初始化",
  101. "script": script,
  102. "updatetime": nowT,
  103. "modifyuser": username,
  104. "event": util.Config.Uploadevent,
  105. "err": errstr,
  106. },
  107. }, true, false)
  108. }
  109. time.Sleep(100 * time.Millisecond)
  110. }
  111. return true
  112. })
  113. InitAllLuaOver <- true //爬虫初始化完毕
  114. logger.Info("高性能模式:LUA加载完成")
  115. numSpider := 0
  116. Allspiders.Range(func(key, value interface{}) bool {
  117. numSpider++
  118. return true
  119. })
  120. logger.Info("总共加载脚本数:", numSpider)
  121. }
  122. // 排队模式下载列表页数据
  123. func QueueUpScriptList() {
  124. logger.Info("节能模式列表页")
  125. CC = make(chan *lua.LState, util.Config.Chansize)
  126. for i := 0; i < util.Config.Chansize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  127. CC <- lua.NewState(lua.Options{
  128. RegistrySize: 256 * 20,
  129. CallStackSize: 256,
  130. IncludeGoStackTrace: false,
  131. })
  132. }
  133. for {
  134. listLen, listNoLen, DelLen := 0, 0, 0
  135. logger.Info(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环", "初始化脚本数量:", InitCount)
  136. LoopListPath.Range(func(key, temp interface{}) bool {
  137. if info, ok := temp.(map[string]string); ok {
  138. script := info["script"]
  139. code := info["code"]
  140. //判断上轮code爬虫是否采集完成
  141. old_is_running := false
  142. sp_ok := false
  143. sp_old := &Spider{}
  144. tmp, b := Allspiders.Load(code)
  145. if b {
  146. if sp_old, sp_ok = tmp.(*Spider); sp_ok {
  147. if !sp_old.Stop { //主线任务未完成
  148. old_is_running = true
  149. }
  150. }
  151. }
  152. logger.Info("Code:", code, "Is Downloading List:", old_is_running, ",subtask num:", sp_old.ListParallelTaskNum)
  153. if !old_is_running { //判断当前爬虫上轮任务是否执行完成
  154. sp, errstr := CreateSpider(code, script, false, false)
  155. //logger.Info("初始化脚本是否成功:", sp != nil, e.Value)
  156. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  157. //sp.Index = qu.IntAll(key)
  158. sp.ListParallelTaskNum = sp_old.ListParallelTaskNum //继承子任务数量
  159. Allspiders.Store(code, sp)
  160. sp.StartJob()
  161. } else {
  162. nowT := time.Now().Unix()
  163. username := "异常"
  164. if sp != nil {
  165. username = sp.MUserName
  166. }
  167. MgoS.Update("spider_loadfail",
  168. map[string]interface{}{
  169. "code": code,
  170. "modifytime": map[string]interface{}{
  171. "$gte": nowT - 12*3600,
  172. "$lte": nowT + 12*3600,
  173. },
  174. },
  175. map[string]interface{}{
  176. "$set": map[string]interface{}{
  177. "code": code,
  178. "type": "初始化",
  179. "script": script,
  180. "updatetime": nowT,
  181. "modifyuser": username,
  182. "event": util.Config.Uploadevent,
  183. "err": errstr,
  184. },
  185. }, true, false)
  186. }
  187. if sp != nil && sp.IsHistoricalMend { //下载历史的爬虫执行一次后删除
  188. DelLen++
  189. LoopListPath.Delete(key)
  190. b = MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": 6}}, false, false)
  191. logger.Debug("Delete History Code:", code, b)
  192. }
  193. } else if sp_ok && sp_old.ListParallelTaskNum < util.Config.PageTurnInfo.ListParallelTaskLimit { //主任务正在执行,开启子任务
  194. spTmp, errstr := CreateSpider(code, script, true, true)
  195. if errstr == "" && spTmp != nil && spTmp.Code != "nil" { //初始化脚本成功
  196. sp_old.ListParallelTaskNum++
  197. logger.Info(code, "子任务开始执行,当前子任务数", sp_old.ListParallelTaskNum)
  198. //启动下载
  199. go func(spt, spo *Spider) {
  200. defer func() {
  201. spt.L.Close() //释放资源
  202. spo.ListParallelTaskNum-- //子任务数减少
  203. }()
  204. err := spt.DownListPageItem() //下载列表
  205. if err != nil {
  206. logger.Error(spt.Code, err)
  207. }
  208. }(spTmp, sp_old)
  209. }
  210. }
  211. listLen++
  212. } else {
  213. logger.Info("Code:", key, "Is Not Download List")
  214. listNoLen++
  215. }
  216. time.Sleep(100 * time.Millisecond)
  217. return true
  218. })
  219. time.Sleep(1 * time.Second)
  220. count_ok, count_no := 0, 0
  221. LoopListPath.Range(func(k, v interface{}) bool {
  222. if v != nil {
  223. count_ok++
  224. } else {
  225. count_no++
  226. }
  227. return true
  228. })
  229. InitCount = count_ok
  230. logger.Info(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环,列表长度,", listLen, listNoLen, "删除数量", DelLen, "执行完毕后数量统计:", count_ok, count_no)
  231. }
  232. }
  233. // 排队模式下载三级页数据
  234. func QueueUpScriptDetail() {
  235. logger.Info("节能模式三级页")
  236. chanSize := util.Config.DetailChansize
  237. CC2 = make(chan *lua.LState, chanSize)
  238. for i := 0; i < chanSize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  239. CC2 <- lua.NewState(lua.Options{
  240. RegistrySize: 256 * 20,
  241. CallStackSize: 256,
  242. IncludeGoStackTrace: false,
  243. })
  244. }
  245. for {
  246. count_ok, count_no := 0, 0
  247. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环", "初始化脚本数量:", InitCount)
  248. LoopListPath.Range(func(key, temp interface{}) bool {
  249. if info, ok := temp.(map[string]string); ok {
  250. count_ok++
  251. code := info["code"]
  252. old_is_running := false
  253. tmp, b := Allspiders2.Load(code)
  254. if b {
  255. if sp_old, ok := tmp.(*Spider); ok {
  256. if !sp_old.Stop {
  257. old_is_running = true
  258. }
  259. }
  260. }
  261. logger.Info("Code:", code, "Is Downloading Detail:", old_is_running)
  262. if !old_is_running { //判断当前爬虫是否正在执行
  263. script := info["script"]
  264. sp, errstr := CreateSpider(code, script, true, false)
  265. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  266. //sp.Index = qu.IntAll(key)
  267. sp.IsMainThread = true
  268. Allspiders2.Store(code, sp)
  269. go sp.DownloadListDetail(false) //下载三级页信息
  270. }
  271. }
  272. } else {
  273. logger.Info("Code:", key, "Is Not Download Detail")
  274. count_no++
  275. }
  276. time.Sleep(100 * time.Millisecond)
  277. return true
  278. })
  279. InitCount = count_ok
  280. time.Sleep(1 * time.Second)
  281. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环完毕,数量统计:", count_ok, count_no)
  282. }
  283. }
  284. // 获取所有爬虫脚本--数据库
  285. func getSpiderScriptDB(code string) map[string]map[string]string {
  286. scriptSpider := map[string]map[string]string{}
  287. query := map[string]interface{}{}
  288. if Supplement { //数据采集
  289. query = map[string]interface{}{
  290. "state": 5,
  291. "platform": "golua平台",
  292. "event": map[string]interface{}{
  293. "$ne": 7000,
  294. },
  295. "spiderimportant": true,
  296. }
  297. } else if code == "all" { //初始化所有脚本
  298. query = map[string]interface{}{"state": 5, "event": util.Config.Uploadevent}
  299. } else { //消息在线上传
  300. query = map[string]interface{}{"code": code, "event": util.Config.Uploadevent}
  301. //query = `{"$or":[{"iupload":1},{"iupload":3}],"event":` + fmt.Sprint(util.Config.Uploadevent) + `,"modifytime":{"$gt":1502937042}}`
  302. }
  303. listdb, _ := MgoEB.Find("luaconfig", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
  304. //临时历史附件
  305. //listdb, _ := MgoEB.Find("luaconfig_test", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
  306. for _, v := range *listdb {
  307. if Supplement && strings.Contains(qu.ObjToString(v["code"]), "_bu") { //补采去除含“_bu”后缀的爬虫
  308. continue
  309. }
  310. old := qu.IntAll(v["old_lua"])
  311. script := ""
  312. if old == 1 {
  313. script = fmt.Sprint(v["luacontent"])
  314. } else {
  315. if v["oldlua"] != nil {
  316. if v["luacontent"] != nil {
  317. script = v["luacontent"].(string)
  318. }
  319. } else {
  320. script = GetScriptByTmp(v)
  321. }
  322. }
  323. scriptSpider[fmt.Sprint(v["code"])] = map[string]string{
  324. "code": fmt.Sprint(v["code"]),
  325. "type": fmt.Sprint(v["state"]),
  326. "script": script,
  327. "createuser": fmt.Sprint(v["createuser"]),
  328. "createuseremail": fmt.Sprint(v["createuseremail"]),
  329. "modifyuser": fmt.Sprint(v["modifyuser"]),
  330. "modifyemail": fmt.Sprint(v["next"]),
  331. }
  332. }
  333. return scriptSpider
  334. }
  335. // 获取所有爬虫脚本--文件
  336. func getSpiderScriptFile(newscript bool) map[string]map[string]string {
  337. scriptSpider := map[string]map[string]string{}
  338. filespider := 0
  339. filepath.Walk("res", func(path string, info os.FileInfo, err error) error {
  340. if info.IsDir() {
  341. return nil
  342. } else if strings.HasPrefix(info.Name(), "spider_") &&
  343. strings.HasSuffix(info.Name(), ".lua") {
  344. //过滤test目录
  345. if strings.Contains(path, "\\test\\") {
  346. return nil
  347. }
  348. loadfile := true
  349. if newscript {
  350. if time.Now().Unix() < info.ModTime().Add(time.Duration(15)*time.Minute).Unix() {
  351. loadfile = true
  352. } else {
  353. loadfile = false
  354. }
  355. }
  356. if loadfile {
  357. f, err := os.Open(path)
  358. defer f.Close()
  359. if err != nil {
  360. logger.Error(err.Error())
  361. }
  362. buf := bufio.NewReader(f)
  363. script := ""
  364. code := ""
  365. for {
  366. line, err := buf.ReadString('\n')
  367. if code == "" && strings.Contains(line, "spiderCode=") {
  368. res := regcode.FindAllStringSubmatch(line, -1)
  369. if len(res) > 0 {
  370. code = res[0][1]
  371. //logger.Info("code", code)
  372. } else {
  373. break
  374. }
  375. }
  376. if scriptSpider[code] == nil {
  377. script = script + line + "\n"
  378. } else {
  379. break
  380. }
  381. if err != nil {
  382. break
  383. }
  384. }
  385. if code != "" && script != "" && scriptSpider[code] == nil {
  386. scriptSpider[code] = map[string]string{
  387. "code": code,
  388. "type": "5",
  389. "script": script,
  390. //脚本文件属性值空
  391. "createuser": "",
  392. "createuseremail": "",
  393. "modifyuser": "",
  394. "modifyemail": "",
  395. }
  396. filespider = filespider + 1
  397. //logger.Info("script", script)
  398. }
  399. }
  400. }
  401. return nil
  402. })
  403. logger.Info("节点", util.Config.Uploadevent, "脚本文件爬虫数", filespider)
  404. return scriptSpider
  405. }
  406. // 脚本下架、上架、重载
  407. func UpdateSpiderByCodeState(code, state string) (bool, error) {
  408. up := false
  409. var err error
  410. if state != "5" && state != "-1" { //脚本下架
  411. SpiderHeart.Delete(code) //脚本下架,删除脚本对应心跳
  412. logger.Info("下架脚本", code)
  413. if util.Config.Working == 1 { //队列模式
  414. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  415. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用(7700下架爬虫)
  416. break
  417. }
  418. tmp, b := as.Load(code)
  419. if b {
  420. sp, ok := tmp.(*Spider)
  421. if ok {
  422. sp.Stop = true
  423. }
  424. as.Delete(code)
  425. logger.Info("下架脚本,Allspiders删除")
  426. }
  427. }
  428. } else { //高性能模式
  429. for _, as := range []sync.Map{Allspiders, Allspiders2} {
  430. if tmp, ok := as.Load(code); ok {
  431. sp, ok := tmp.(*Spider)
  432. if ok {
  433. sp.Stop = true
  434. sp.L.Close()
  435. as.Delete(code)
  436. }
  437. }
  438. }
  439. }
  440. LoopListPath.Delete(code)
  441. logger.Info(code, "脚本下架成功")
  442. up = true
  443. err = nil
  444. } else if state == "-1" { //爬虫重采更新线上爬虫
  445. scriptMap := getSpiderScriptDB(code)
  446. logger.Info("更新线上脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  447. if util.Config.Working == 1 { //排队模式
  448. for _, v := range scriptMap {
  449. listsize := 0
  450. listHas := false
  451. count_ok, count_no := 0, 0
  452. LoopListPath.Range(func(key, val interface{}) bool {
  453. listsize++
  454. if tmp, ok := val.(map[string]string); ok {
  455. count_ok++
  456. if tmp["code"] == code && key == code { //队列存在,重载脚本
  457. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  458. listHas = true
  459. LoopListPath.Store(key, v)
  460. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  461. logger.Info("队列模式更新列表页信息状态", code)
  462. }
  463. } else {
  464. count_no++
  465. }
  466. return true
  467. })
  468. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  469. if !listHas { //队列不存在
  470. logger.Info("重采更新爬虫失败:", code)
  471. up = false
  472. err = errors.New("爬虫不在线:" + code)
  473. } else {
  474. up = true
  475. err = nil
  476. logger.Info("重采更新爬虫成功", code)
  477. }
  478. }
  479. } else { //高性能模式
  480. for k, v := range scriptMap {
  481. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  482. sp := spd.(*Spider)
  483. sp.ScriptFile = v["script"]
  484. sp.MUserName = v["modifyuser"]
  485. sp.MUserEmail = v["modifyemail"]
  486. Allspiders.Store(k, sp)
  487. up = true
  488. err = nil
  489. logger.Info("重采更新爬虫成功", sp.Code)
  490. } else { //不存在
  491. up = false
  492. err = errors.New("爬虫不在线:" + code)
  493. logger.Info("重采更新爬虫失败:", code)
  494. }
  495. //Allspiders2
  496. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  497. sp2 := spd2.(*Spider)
  498. sp2.ScriptFile = v["script"]
  499. sp2.MUserName = v["modifyuser"]
  500. sp2.MUserEmail = v["modifyemail"]
  501. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  502. Allspiders2.Store(k, sp2)
  503. // up = true
  504. // err = nil
  505. logger.Info("Allspiders2重采更新爬虫成功", sp2.Code)
  506. } else { //不存在
  507. // up = false
  508. // err = errors.New("爬虫不在线:" + code)
  509. logger.Info("Allspiders2重采更新爬虫失败:", code)
  510. }
  511. }
  512. }
  513. } else { //脚本上架
  514. scriptMap := getSpiderScriptDB(code)
  515. logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  516. if util.Config.Modal == 1 && !util.Config.IsHistoryEvent { //分开采集
  517. go UpdateHighListDataByCode(code)
  518. }
  519. if util.Config.Working == 1 { //排队模式
  520. for _, v := range scriptMap {
  521. LoopListPath.Store(code, v) //更新或新增爬虫信息
  522. listsize, count_ok, count_no := 0, 0, 0
  523. isOk := false
  524. LoopListPath.Range(func(key, val interface{}) bool {
  525. listsize++
  526. if tmp, ok := val.(map[string]string); ok {
  527. count_ok++
  528. if tmp["code"] == code && key == code { //队列存在
  529. isOk = true
  530. }
  531. } else {
  532. count_no++
  533. }
  534. return true
  535. })
  536. logger.Info("上架脚本", isOk, code)
  537. logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  538. if !isOk {
  539. return false, errors.New("use " + code + " failed")
  540. }
  541. up = true
  542. }
  543. } else { //高性能模式
  544. for k, v := range scriptMap {
  545. LoopListPath.Store(k, v)
  546. //1、Allspiders对应7000、7100、7400脚本上架下载数据(列表页爬虫集合)
  547. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  548. sp := spd.(*Spider)
  549. sp.ScriptFile = v["script"]
  550. UpdateSpider(sp, k, v["script"]) //爬虫其他信息更新
  551. //sp.LoadScript(&sp.Name, &sp.Channel, &sp.MUserName, k, sp.ScriptFile, true, false) //更新上架,重载脚本
  552. Allspiders.Store(k, sp)
  553. up = true
  554. err = nil
  555. logger.Info("上架重载脚本", sp.Code)
  556. } else { //新增脚本
  557. sp, errstr := CreateSpider(k, v["script"], true, false)
  558. if errstr == "" && sp != nil && sp.Code != "nil" {
  559. Allspiders.Store(k, sp)
  560. sp.StartJob()
  561. up = true
  562. err = nil
  563. logger.Info("上架新增脚本", sp.Code)
  564. } else {
  565. err = errors.New("新增失败")
  566. nowT := time.Now().Unix()
  567. MgoS.Update("spider_loadfail",
  568. map[string]interface{}{
  569. "code": k,
  570. "modifytime": map[string]interface{}{
  571. "$gte": nowT - 12*3600,
  572. "$lte": nowT + 12*3600,
  573. },
  574. },
  575. map[string]interface{}{
  576. "$set": map[string]interface{}{
  577. "code": k,
  578. "type": "新增初始化脚本",
  579. "script": v["script"],
  580. "updatetime": nowT,
  581. "modifyuser": sp.MUserName,
  582. "event": util.Config.Uploadevent,
  583. "err": errstr,
  584. },
  585. }, true, false)
  586. }
  587. }
  588. //2、Allspiders2对应7100、7110、7400上架采集三级页数据(Allspiders2三级页爬虫集合)
  589. if util.Config.Modal == 1 && !util.Config.IsHistoryEvent {
  590. //Allspiders2
  591. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  592. sp2 := spd2.(*Spider)
  593. sp2.ScriptFile = v["script"]
  594. UpdateSpider(sp2, k, v["script"]) //爬虫其他信息更新
  595. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  596. Allspiders2.Store(k, sp2) //重载后放入集合
  597. // up = true
  598. // err = nil
  599. logger.Info("Allspiders2上架重载脚本", sp2.Code)
  600. } else { //新增脚本
  601. sp2, errstr := CreateSpider(k, v["script"], true, false)
  602. if errstr == "" && sp2 != nil && sp2.Code != "nil" {
  603. sp2.IsMainThread = true //多线程采集详情页时使用
  604. go sp2.DownloadHighDetail(true) //根据列表页数据下载三级页
  605. Allspiders2.Store(k, sp2)
  606. // up = true
  607. // err = nil
  608. logger.Info("Allspiders2上架新增脚本", sp2.Code)
  609. } /*else {
  610. err = errors.New("新增失败")
  611. mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  612. "code": k,
  613. "type": "新增脚本失败",
  614. "script": v["script"],
  615. "intime": time.Now().Format(qu.Date_Full_Layout),
  616. "event": util.Config.Uploadevent,
  617. })
  618. }*/
  619. }
  620. }
  621. }
  622. }
  623. }
  624. logger.Info("上下架:", up, err)
  625. return up, err
  626. }
  627. // 定时重载脚本文件
  628. func ReloadSpiderFile() {
  629. scriptMap := getSpiderScriptFile(true)
  630. for k, v := range scriptMap {
  631. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  632. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用
  633. continue
  634. }
  635. if spd, ok := as.Load(k); ok { //对应脚本已存在,更新
  636. sp := spd.(*Spider)
  637. logger.Info("定时重载脚本", sp.Code)
  638. sp.ScriptFile = v["script"]
  639. sp.MUserName = v["modifyuser"]
  640. sp.MUserEmail = v["modifyemail"]
  641. as.Store(k, sp)
  642. } else { //新增脚本
  643. var sp *Spider
  644. var errstr string
  645. if util.Config.Working == 1 { //排队模式
  646. if i == 0 {
  647. //length := 0
  648. //LoopListPath.Range(func(k, v interface{}) bool {
  649. // length++
  650. // return true
  651. //})
  652. LoopListPath.Store(k, v) //排队模式Allspiders,Allspiders2共用一个LoopListPath,新增一次即可
  653. sp, errstr = CreateSpider(k, v["script"], false, false)
  654. } else {
  655. sp, errstr = CreateSpider(k, v["script"], true, false)
  656. }
  657. } else {
  658. sp, errstr = CreateSpider(k, v["script"], true, false)
  659. }
  660. if errstr == "" && sp != nil && sp.Code != "nil" {
  661. sp.MUserName = v["modifyuser"]
  662. sp.MUserEmail = v["modifyemail"]
  663. as.Store(k, sp)
  664. if util.Config.Working == 1 {
  665. sp.Stop = true
  666. // if i == 0 {
  667. // length := 0
  668. // LoopListPath.Range(func(k, v interface{}) bool {
  669. // length++
  670. // return true
  671. // })
  672. // LoopListPath.Store(length, v)
  673. // }
  674. } else {
  675. sp.Stop = false
  676. if i == 0 { //高性能模式只有Allspiders启动爬虫,Allspiders2只负责下三级页
  677. sp.StartJob()
  678. }
  679. }
  680. logger.Info("定时重载脚本--新增", sp.Code)
  681. } else {
  682. if i == 0 {
  683. nowT := time.Now().Unix()
  684. MgoS.Update("spider_loadfail",
  685. map[string]interface{}{
  686. "code": k,
  687. "modifytime": map[string]interface{}{
  688. "$gte": nowT - 12*3600,
  689. "$lte": nowT + 12*3600,
  690. },
  691. },
  692. map[string]interface{}{
  693. "$set": map[string]interface{}{
  694. "code": k,
  695. "type": "定时重载--新增失败",
  696. "script": v["script"],
  697. "updatetime": nowT,
  698. "modifyuser": sp.MUserName,
  699. "event": util.Config.Uploadevent,
  700. "err": errstr,
  701. },
  702. }, true, false)
  703. }
  704. }
  705. }
  706. }
  707. // if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  708. // sp := spd.(*Spider)
  709. // logger.Info("定时重载脚本", sp.Code)
  710. // sp.ScriptFile = v["script"]
  711. // if v["createuser"] != "" {
  712. // sp.UserName = v["createuser"]
  713. // }
  714. // if v["createuseremail"] != "" {
  715. // sp.UserEmail = v["createuseremail"]
  716. // }
  717. // sp.MUserName = v["modifyuser"]
  718. // sp.MUserEmail = v["modifyemail"]
  719. // Allspiders.Store(k, sp)
  720. // } else { //新增脚本
  721. // var sp *Spider
  722. // if util.Config.Working == 1 { //排队模式
  723. // length := 0
  724. // LoopListPath.Range(func(k, v interface{}) bool {
  725. // length++
  726. // return true
  727. // })
  728. // LoopListPath.Store(length, v)
  729. // sp = CreateSpider(k, v["script"], false,false)
  730. // } else {
  731. // sp = NewSpider(k, v["script"])
  732. // }
  733. // if sp != nil && sp.Code != "nil" {
  734. // if v["createuser"] != "" {
  735. // sp.UserName = v["createuser"]
  736. // }
  737. // if v["createuseremail"] != "" {
  738. // sp.UserEmail = v["createuseremail"]
  739. // }
  740. // sp.MUserName = v["modifyuser"]
  741. // sp.MUserEmail = v["modifyemail"]
  742. // Allspiders.Store(k, sp)
  743. // if util.Config.Working == 1 {
  744. // sp.Stop = true
  745. // length := 0
  746. // LoopListPath.Range(func(k, v interface{}) bool {
  747. // length++
  748. // return true
  749. // })
  750. // LoopListPath.Store(length, v)
  751. // } else {
  752. // sp.Stop = false
  753. // sp.StartJob()
  754. // }
  755. // logger.Info("定时重载脚本--新增", sp.Code)
  756. // } else {
  757. // mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  758. // "code": k,
  759. // "type": "定时重载--新增失败",
  760. // "script": v["script"],
  761. // "intime": time.Now().Format(qu.Date_Full_Layout),
  762. // "event": util.Config.Uploadevent,
  763. // })
  764. // }
  765. // }
  766. }
  767. util.TimeAfterFunc(time.Duration(15)*time.Minute, ReloadSpiderFile, TimeChan)
  768. }
  769. // 生成爬虫
  770. func CreateSpider(code, luafile string, newstate, thread bool) (*Spider, string) {
  771. defer qu.Catch()
  772. spider := &Spider{}
  773. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, thread)
  774. if err != "" {
  775. return nil, err
  776. }
  777. spider.Code = spider.GetVar("spiderCode")
  778. spider.SCode = spider.Code
  779. spider.Name = spider.GetVar("spiderName")
  780. spider.Channel = spider.GetVar("spiderChannel")
  781. //spider.LastExecTime = GetLastExectime(spider.Code)
  782. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  783. spider.Collection = spider.GetVar("spider2Collection")
  784. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  785. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  786. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  787. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  788. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  789. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  790. if spiderSleepBase == -1 {
  791. spider.SleepBase = 1000
  792. } else {
  793. spider.SleepBase = spiderSleepBase
  794. }
  795. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  796. if spiderSleepRand == -1 {
  797. spider.SleepRand = 1000
  798. } else {
  799. spider.SleepRand = spiderSleepRand
  800. }
  801. spiderTimeout := spider.GetIntVar("spiderTimeout")
  802. if spiderTimeout == -1 {
  803. spider.Timeout = 60
  804. } else {
  805. spider.Timeout = int64(spiderTimeout)
  806. }
  807. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  808. //spider.UserName = spider.GetVar("spiderUserName")
  809. //spider.UserEmail = spider.GetVar("spiderUserEmail")
  810. //spider.UploadTime = spider.GetVar("spiderUploadTime")
  811. spider.MUserName = spider.GetVar("spiderUserName")
  812. spider.MUserEmail = spider.GetVar("spiderUserEmail")
  813. //新增历史补漏
  814. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  815. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  816. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  817. //新老爬虫
  818. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  819. //爬虫类型
  820. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  821. return spider, ""
  822. }
  823. // 更新爬虫
  824. func UpdateSpider(spider *Spider, code, script string) {
  825. ts := &Spider{}
  826. ts.Script.L = lua.NewState(lua.Options{
  827. RegistrySize: 256 * 20,
  828. CallStackSize: 256,
  829. IncludeGoStackTrace: false,
  830. })
  831. defer ts.L.Close()
  832. ts.Script.L.PreloadModule("http", gluahttp.NewHttpModule(&http.Client{}).Loader)
  833. ts.Script.L.PreloadModule("json", lujson.Loader)
  834. if err := ts.Script.L.DoString(script); err != nil {
  835. logger.Debug(code + ",加载lua脚本错误:" + err.Error())
  836. return
  837. }
  838. spider.Channel = ts.GetVar("spiderChannel") //栏目名称
  839. spider.DownDetail = ts.GetBoolVar("spiderDownDetailPage") //是否下三级页
  840. spider.Collection = ts.GetVar("spider2Collection") //存储表
  841. spider.SpiderRunRate = int64(ts.GetIntVar("spiderRunRate")) //间隔时间
  842. spider.StoreToMsgEvent = ts.GetIntVar("spiderStoreToMsgEvent") //4002
  843. spider.StoreMode = ts.GetIntVar("spiderStoreMode") //2
  844. spider.CoverAttr = ts.GetVar("spiderCoverAttr") //title
  845. //下载三级页(DownloadDetailPage)随机延迟
  846. spiderSleepBase := ts.GetIntVar("spiderSleepBase")
  847. if spiderSleepBase == -1 {
  848. spider.SleepBase = 1000
  849. } else {
  850. spider.SleepBase = spiderSleepBase
  851. }
  852. spiderSleepRand := ts.GetIntVar("spiderSleepRand")
  853. if spiderSleepRand == -1 {
  854. spider.SleepRand = 1000
  855. } else {
  856. spider.SleepRand = spiderSleepRand
  857. }
  858. spiderTimeout := ts.GetIntVar("spiderTimeout")
  859. if spiderTimeout == -1 {
  860. spider.Timeout = 60
  861. } else {
  862. spider.Timeout = int64(spiderTimeout)
  863. }
  864. spider.MUserName = spider.GetVar("spiderUserName")
  865. spider.MUserEmail = spider.GetVar("spiderUserEmail")
  866. spider.TargetChannelUrl = ts.GetVar("spiderTargetChannelUrl") //栏目地址
  867. //新增历史补漏
  868. spider.IsHistoricalMend = ts.GetBoolVar("spiderIsHistoricalMend")
  869. spider.IsMustDownload = ts.GetBoolVar("spiderIsMustDownload")
  870. //新老爬虫
  871. spider.IsCompete = ts.GetBoolVar("spiderIsCompete")
  872. //爬虫类型
  873. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  874. }
  875. // 多线程生成爬虫
  876. func NewSpiderForThread(code, luafile string) (*Spider, string) {
  877. defer qu.Catch()
  878. spider := &Spider{}
  879. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, true)
  880. if err != "" {
  881. return nil, err
  882. }
  883. spider.Code = spider.GetVar("spiderCode")
  884. spider.SCode = spider.Code
  885. spider.Script.SCode = spider.Code
  886. spider.Name = spider.GetVar("spiderName")
  887. spider.Channel = spider.GetVar("spiderChannel")
  888. //spider.LastExecTime = GetLastExectime(spider.Code)
  889. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  890. spider.Collection = spider.GetVar("spider2Collection")
  891. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  892. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  893. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  894. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  895. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  896. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  897. if spiderSleepBase == -1 {
  898. spider.SleepBase = 1000
  899. } else {
  900. spider.SleepBase = spiderSleepBase
  901. }
  902. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  903. if spiderSleepRand == -1 {
  904. spider.SleepRand = 1000
  905. } else {
  906. spider.SleepRand = spiderSleepRand
  907. }
  908. spiderTimeout := spider.GetIntVar("spiderTimeout")
  909. if spiderTimeout == -1 {
  910. spider.Timeout = 60
  911. } else {
  912. spider.Timeout = int64(spiderTimeout)
  913. }
  914. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  915. //spider.UserName = spider.GetVar("spiderUserName")
  916. //spider.UserEmail = spider.GetVar("spiderUserEmail")
  917. //spider.UploadTime = spider.GetVar("spiderUploadTime")
  918. //新增历史补漏
  919. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  920. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  921. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  922. //新老爬虫
  923. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  924. //爬虫类型
  925. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  926. return spider, ""
  927. }
  928. // 下载量入库
  929. func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum, yesterdayDowncount, yestoDayRequestNum int32) {
  930. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  931. updata := map[string]interface{}{}
  932. if addtotal {
  933. updata = map[string]interface{}{
  934. "$inc": map[string]interface{}{"totaldown": todayDowncount, "totalreq": todayRequestNum},
  935. "$set": map[string]interface{}{
  936. "yesdowncount": yesterdayDowncount,
  937. "yesdownreq": yestoDayRequestNum,
  938. "todaydowncount": todayDowncount,
  939. "todaydownreq": todayRequestNum,
  940. "date": date,
  941. "year": time.Now().Year(),
  942. "month": time.Now().Month(),
  943. "day": time.Now().Day(),
  944. },
  945. }
  946. } else {
  947. updata = map[string]interface{}{
  948. "$set": map[string]interface{}{
  949. "yesdowncount": yesterdayDowncount,
  950. "yesdownreq": yestoDayRequestNum,
  951. "todaydowncount": todayDowncount,
  952. "todaydownreq": todayRequestNum,
  953. "date": date,
  954. "year": time.Now().Year(),
  955. "month": time.Now().Month(),
  956. "day": time.Now().Day(),
  957. },
  958. }
  959. }
  960. MgoS.Update("spider_downlog", map[string]interface{}{"code": code, "date": date}, updata, true, false)
  961. }
  962. // 获取下载的上下限(没用)
  963. func GetLimitDownload(code string) (uplimit, lowlimit int) {
  964. defer qu.Catch()
  965. ret, _ := MgoS.FindOne("spider_ldtime", map[string]interface{}{"code": code})
  966. if ret != nil && len(*ret) > 0 {
  967. uplimit = qu.IntAll((*ret)["uplimit"])
  968. lowlimit = qu.IntAll((*ret)["lowlimit"])
  969. return uplimit, lowlimit
  970. } else {
  971. return 100, 0
  972. }
  973. }
  974. // 拼装脚本
  975. func GetScriptByTmp(luaconfig map[string]interface{}) string {
  976. defer qu.Catch()
  977. script := ""
  978. if luaconfig["listcheck"] == nil {
  979. luaconfig["listcheck"] = ""
  980. }
  981. if luaconfig["contentcheck"] == nil {
  982. luaconfig["contentcheck"] = ""
  983. }
  984. modifyUser := qu.ObjToString(luaconfig["modifyuser"])
  985. modifyUserEmail := qu.ObjToString(luaconfig["createuseremail"])
  986. if luaconfig != nil && len(luaconfig) > 0 {
  987. common := luaconfig["param_common"].([]interface{})
  988. //新增spiderIsHistoricalMend spiderIsMustDownload
  989. if len(common) == 15 {
  990. common = append(common, modifyUser, modifyUserEmail, "")
  991. } else {
  992. common = append(common, false, false, modifyUser, modifyUserEmail, "")
  993. }
  994. for k, v := range common {
  995. if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
  996. common[k] = qu.IntAll(v)
  997. }
  998. }
  999. script, _ = GetTmpModel(map[string][]interface{}{"common": common})
  1000. //发布时间
  1001. script_time := ""
  1002. if qu.IntAll(luaconfig["type_time"]) == 0 { //向导模式
  1003. time := luaconfig["param_time"].([]interface{})
  1004. script_time, _ = GetTmpModel(map[string][]interface{}{
  1005. "time": time,
  1006. })
  1007. } else { //专家模式
  1008. script_time = luaconfig["str_time"].(string)
  1009. }
  1010. //列表页
  1011. script_list := ""
  1012. if qu.IntAll(luaconfig["type_list"]) == 0 { //向导模式
  1013. list := luaconfig["param_list"].([]interface{})
  1014. addrs := strings.Split(list[1].(string), "\n")
  1015. if len(addrs) > 0 {
  1016. for k, v := range addrs {
  1017. addrs[k] = "'" + v + "'"
  1018. }
  1019. list[1] = strings.Join(addrs, ",")
  1020. } else {
  1021. list[1] = ""
  1022. }
  1023. script_list, _ = GetTmpModel(map[string][]interface{}{
  1024. "list": list,
  1025. "listcheck": []interface{}{luaconfig["listcheck"]},
  1026. })
  1027. } else { //专家模式
  1028. script_list = luaconfig["str_list"].(string)
  1029. }
  1030. //三级页
  1031. script_content := ""
  1032. if qu.IntAll(luaconfig["type_content"]) == 0 { //向导模式
  1033. content := luaconfig["param_content"].([]interface{})
  1034. script_content, _ = GetTmpModel(map[string][]interface{}{
  1035. "content": content,
  1036. "contentcheck": []interface{}{luaconfig["contentcheck"]},
  1037. })
  1038. } else { //专家模式
  1039. script_content = luaconfig["str_content"].(string)
  1040. }
  1041. script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"], luaconfig["infoformat"])
  1042. script += `
  1043. ` + script_time + `
  1044. ` + script_list + `
  1045. ` + script_content
  1046. script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
  1047. }
  1048. return script
  1049. }
  1050. // 生成爬虫脚本
  1051. func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
  1052. qu.Try(func() {
  1053. //param_common拼接
  1054. if param != nil && param["common"] != nil {
  1055. if len(param["common"]) < 12 {
  1056. err = "公共参数配置不全"
  1057. } else {
  1058. script = fmt.Sprintf(util.Tmp_common, param["common"]...)
  1059. }
  1060. }
  1061. //发布时间拼接
  1062. if param != nil && param["time"] != nil {
  1063. if len(param["time"]) < 3 {
  1064. err = "方法:time-参数配置不全"
  1065. } else {
  1066. script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
  1067. }
  1068. }
  1069. //列表页拼接
  1070. if param != nil && param["list"] != nil {
  1071. if len(param["list"]) < 7 {
  1072. err = "方法:list-参数配置不全"
  1073. } else {
  1074. list := []interface{}{param["listcheck"][0]}
  1075. list = append(list, param["list"]...)
  1076. script += fmt.Sprintf(util.Tmp_pagelist, list...)
  1077. script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
  1078. }
  1079. }
  1080. //详情页拼接
  1081. if param != nil && param["content"] != nil {
  1082. if len(param["content"]) < 2 {
  1083. err = "方法:content-参数配置不全"
  1084. } else {
  1085. content := []interface{}{param["contentcheck"][0]}
  1086. content = append(content, param["content"]...)
  1087. script += fmt.Sprintf(util.Tmp_content, content...)
  1088. }
  1089. }
  1090. }, func(e interface{}) {
  1091. err = e
  1092. })
  1093. return script, err
  1094. }
  1095. // 补充模型
  1096. func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
  1097. defer qu.Catch()
  1098. //补充通用信息
  1099. commstr := `item["spidercode"]="` + comm[0].(string) + `";`
  1100. commstr += `item["site"]="` + comm[1].(string) + `";`
  1101. commstr += `item["channel"]="` + comm[2].(string) + `";`
  1102. script = strings.Replace(script, "--Common--", commstr, -1)
  1103. //补充模型信息
  1104. modelstr := ""
  1105. for k, v := range model {
  1106. modelstr += `item["` + k + `"]="` + v.(string) + `";`
  1107. }
  1108. script = strings.Replace(script, "--Model--", modelstr, -1)
  1109. return script
  1110. }
  1111. // 爬虫信息提交编辑器(心跳)
  1112. func SpiderInfoSend() {
  1113. time.Sleep(15 * time.Second)
  1114. list := []interface{}{}
  1115. Allspiders.Range(func(key, value interface{}) bool {
  1116. v := value.(*Spider)
  1117. info := map[string]interface{}{}
  1118. info["code"] = v.Code
  1119. info["todayDowncount"] = v.TodayDowncount
  1120. info["toDayRequestNum"] = v.ToDayRequestNum
  1121. info["yesterdayDowncount"] = v.YesterdayDowncount
  1122. info["yestoDayRequestNum"] = v.YestoDayRequestNum
  1123. info["totalDowncount"] = v.TotalDowncount
  1124. info["totalRequestNum"] = v.TotalRequestNum
  1125. info["errorNum"] = v.ErrorNum
  1126. info["roundCount"] = v.RoundCount
  1127. info["runRate"] = v.SpiderRunRate
  1128. info["lastHeartbeat"] = v.LastHeartbeat
  1129. info["lastDowncount"] = v.LastDowncount
  1130. info["lstate"] = v.L.Status(v.L)
  1131. list = append(list, info)
  1132. return true
  1133. })
  1134. bs, _ := json.Marshal(list)
  1135. value := url.Values{
  1136. "data": []string{util.Se.EncodeString(string(bs))},
  1137. "type": []string{"info"},
  1138. }
  1139. _, err := http.PostForm(util.Config.Editoraddr, value)
  1140. if err != nil {
  1141. logger.Error("send to editor: ", err.Error())
  1142. }
  1143. util.TimeAfterFunc(5*time.Minute, SpiderInfoSend, TimeChan)
  1144. }
  1145. // 保存心跳信息
  1146. func SaveHeartInfo() {
  1147. time.Sleep(20 * time.Minute)
  1148. num := 0
  1149. SpiderHeart.Range(func(key, value interface{}) bool {
  1150. code := key.(string)
  1151. sp, spiderOk := LoopListPath.Load(code)
  1152. if spiderOk && sp != nil {
  1153. heart, heartOk := value.(*Heart)
  1154. if heartOk {
  1155. num++
  1156. update := []map[string]interface{}{}
  1157. update = append(update, map[string]interface{}{"code": code})
  1158. update = append(update, map[string]interface{}{"$set": map[string]interface{}{
  1159. "site": heart.Site,
  1160. "channel": heart.Channel,
  1161. "firstpage": heart.FirstPageHeart,
  1162. "list": heart.ListHeart,
  1163. "findlist": heart.FindListHeart,
  1164. "detail": heart.DetailHeart,
  1165. "detailexecute": heart.DetailExecuteHeart,
  1166. "modifyuser": heart.ModifyUser,
  1167. "event": util.Config.Uploadevent,
  1168. "updatetime": time.Now().Unix(),
  1169. "del": false,
  1170. }})
  1171. UpdataHeartCache <- update
  1172. }
  1173. } else {
  1174. SpiderHeart.Delete(key)
  1175. }
  1176. return true
  1177. })
  1178. logger.Info("更新心跳个数:", num)
  1179. time.AfterFunc(1*time.Second, SaveHeartInfo)
  1180. }
  1181. // 保存7000节点爬虫转增量节点日志
  1182. func SpiderCodeSendToEditor(code string) {
  1183. defer qu.Catch()
  1184. MgoEB.Save("luamovelog", map[string]interface{}{
  1185. "code": code,
  1186. "comeintime": time.Now().Unix(),
  1187. "ok": false,
  1188. })
  1189. //ok := false
  1190. //for i := 1; i <= 3; i++ {
  1191. // logger.Info("Code:", code, " times:", i, " Send Move Event")
  1192. // list := []interface{}{}
  1193. // list = append(list, code)
  1194. // bs, _ := json.Marshal(list)
  1195. // value := url.Values{
  1196. // "data": []string{util.Se.EncodeString(string(bs))},
  1197. // "type": []string{"code"},
  1198. // }
  1199. // res, err := http.PostForm(util.Config.Editoraddr, value)
  1200. // if err != nil {
  1201. // logger.Error("Send To Editor For Move Event Failed,Code:", code)
  1202. // } else {
  1203. // if res != nil {
  1204. // res.Body.Close()
  1205. // }
  1206. // ok = true
  1207. // break
  1208. // }
  1209. //}
  1210. //logger.Info("Code:", code, " Send Move Event:", ok)
  1211. //MgoEB.Save("luamovelog", map[string]interface{}{
  1212. // "code": code,
  1213. // "comeintime": time.Now().Unix(),
  1214. // "type": "sendfail",
  1215. // "ok": ok,
  1216. //})
  1217. }