handler.go 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347
  1. package spider
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. mu "mfw/util"
  8. "net/http"
  9. "net/url"
  10. "os"
  11. "path/filepath"
  12. qu "qfw/util"
  13. mgu "qfw/util/mongodbutil"
  14. "regexp"
  15. util "spiderutil"
  16. "strings"
  17. "sync"
  18. "time"
  19. "github.com/donnie4w/go-logger/logger"
  20. "github.com/yuin/gopher-lua"
  21. . "gopkg.in/mgo.v2/bson"
  22. )
  23. var SpiderHeart sync.Map = sync.Map{} //爬虫心跳
  24. var Allspiders sync.Map = sync.Map{}
  25. var Allspiders2 sync.Map = sync.Map{}
  26. var LoopListPath sync.Map = sync.Map{}
  27. //var ChanDels = map[int]string{}
  28. //var lock sync.Mutex
  29. var CC chan *lua.LState
  30. var CC2 chan *lua.LState
  31. var Chansize int
  32. var regcode, _ = regexp.Compile(`="(.*)"`)
  33. var InitCount int
  34. var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
  35. func InitSpider() {
  36. scriptMap := getSpiderScriptDB("all") //加载爬虫,初始化模板
  37. scriptMapFile := getSpiderScriptFile(false)
  38. for code, v := range scriptMap {
  39. LoopListPath.Store(code, v)
  40. InitCount++
  41. }
  42. for code, v := range scriptMapFile {
  43. LoopListPath.Store(code, v)
  44. InitCount++
  45. }
  46. if util.Config.Working == 0 {
  47. NoQueueScript() //高性能模式
  48. } else {
  49. if util.Config.Modal == 0 { //原始模式
  50. QueueUpScriptList()
  51. } else { //列表页和三级页分开采集
  52. go QueueUpScriptList() //节能模式列表页
  53. go QueueUpScriptDetail() //节能模式三级页
  54. }
  55. }
  56. }
  57. //高性能模式
  58. func NoQueueScript() {
  59. list := *mgu.Find("spider_ldtime", "spider", "spider", ``, nil, `{"code":1,"uplimit":1,"lowlimit":1}`, false, -1, -1)
  60. LoopListPath.Range(func(key, temp interface{}) bool {
  61. if info, ok := temp.(map[string]string); ok {
  62. code := info["code"]
  63. script := info["script"]
  64. sp, errstr := NewSpider(code, script)
  65. if errstr == "" && sp != nil && sp.Code != "nil" { //脚本加载成功
  66. //sp.Index = qu.IntAll(key)
  67. //sp2.Index = qu.IntAll(key)
  68. if info["createuser"] != "" {
  69. sp.UserName = info["createuser"]
  70. }
  71. if info["createuseremail"] != "" {
  72. sp.UserEmail = info["createuseremail"]
  73. }
  74. sp.MUserName = info["modifyuser"]
  75. sp.MUserEmail = info["modifyemail"]
  76. Allspiders.Store(sp.Code, sp)
  77. for _, tmp := range list {
  78. if qu.ObjToString(tmp["code"]) == sp.Code {
  79. sp.UpperLimit = qu.IntAll(tmp["uplimit"])
  80. //sp2.UpperLimit = qu.IntAll(tmp["uplimit"])
  81. sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
  82. //sp2.LowerLimit = qu.IntAll(tmp["lowlimit"])
  83. break
  84. }
  85. }
  86. if util.Config.Modal == 1 { //列表页、三级页分开采集模式
  87. sp2, _ := NewSpider(code, script)
  88. sp2.UserName = sp.UserName
  89. sp2.UserEmail = sp.UserEmail
  90. sp2.MUserName = sp.MUserName
  91. sp2.MUserEmail = sp.MUserEmail
  92. sp2.IsMainThread = true //多线程采集时使用
  93. Allspiders2.Store(sp.Code, sp2)
  94. }
  95. sp.StartJob()
  96. //util.TimeSleepFunc(10*time.Millisecond, TimeSleepChan)
  97. } else {
  98. logger.Info(code, "脚本加载失败,请检查!")
  99. nowT := time.Now().Unix()
  100. username := "异常"
  101. if sp != nil {
  102. username = sp.MUserName
  103. }
  104. mgu.Update("spider_loadfail", "spider", "spider",
  105. map[string]interface{}{
  106. "code": code,
  107. "modifytime": map[string]interface{}{
  108. "$gte": nowT - 12*3600,
  109. "$lte": nowT + 12*3600,
  110. },
  111. },
  112. map[string]interface{}{
  113. "$set": map[string]interface{}{
  114. "code": code,
  115. "type": "初始化",
  116. "script": script,
  117. "updatetime": nowT,
  118. "modifyuser": username,
  119. "event": util.Config.Uploadevent,
  120. "err": errstr,
  121. },
  122. }, true, false)
  123. }
  124. time.Sleep(100 * time.Millisecond)
  125. }
  126. return true
  127. })
  128. InitAllLuaOver <- true //爬虫初始化完毕
  129. logger.Info("高性能模式:LUA加载完成")
  130. numSpider := 0
  131. Allspiders.Range(func(key, value interface{}) bool {
  132. numSpider++
  133. return true
  134. })
  135. logger.Info("总共加载脚本数:", numSpider)
  136. }
  137. //排队模式下载列表页数据
  138. func QueueUpScriptList() {
  139. logger.Info("节能模式列表页")
  140. CC = make(chan *lua.LState, util.Config.Chansize)
  141. for i := 0; i < util.Config.Chansize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  142. CC <- lua.NewState(lua.Options{
  143. RegistrySize: 256 * 20,
  144. CallStackSize: 256,
  145. IncludeGoStackTrace: false,
  146. })
  147. }
  148. for {
  149. listLen, listNoLen, DelLen := 0, 0, 0
  150. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环", "初始化脚本数量:", InitCount)
  151. LoopListPath.Range(func(key, temp interface{}) bool {
  152. if info, ok := temp.(map[string]string); ok {
  153. code := info["code"]
  154. old_is_running := false
  155. tmp, b := Allspiders.Load(code)
  156. if b {
  157. if sp_old, ok := tmp.(*Spider); ok {
  158. if !sp_old.Stop {
  159. old_is_running = true
  160. }
  161. }
  162. }
  163. logger.Info("Code:", code, "Is Downloading List:", old_is_running)
  164. if !old_is_running { //判断当前爬虫是否正在执行
  165. script := info["script"]
  166. sp, errstr := NewSpider_New(code, script, false)
  167. //logger.Info("初始化脚本是否成功:", sp != nil, e.Value)
  168. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  169. //sp.Index = qu.IntAll(key)
  170. sp.UserName = info["createuser"]
  171. sp.UserEmail = info["createuseremail"]
  172. sp.MUserName = info["modifyuser"]
  173. sp.MUserEmail = info["modifyemail"]
  174. Allspiders.Store(code, sp)
  175. sp.StartJob()
  176. } else {
  177. nowT := time.Now().Unix()
  178. username := "异常"
  179. if sp != nil {
  180. username = sp.MUserName
  181. }
  182. mgu.Update("spider_loadfail", "spider", "spider",
  183. map[string]interface{}{
  184. "code": code,
  185. "modifytime": map[string]interface{}{
  186. "$gte": nowT - 12*3600,
  187. "$lte": nowT + 12*3600,
  188. },
  189. },
  190. map[string]interface{}{
  191. "$set": map[string]interface{}{
  192. "code": code,
  193. "type": "初始化",
  194. "script": script,
  195. "updatetime": nowT,
  196. "modifyuser": username,
  197. "event": util.Config.Uploadevent,
  198. "err": errstr,
  199. },
  200. }, true, false)
  201. }
  202. if sp != nil && sp.IsHistoricalMend { //下载历史的爬虫执行一次后删除
  203. DelLen++
  204. LoopListPath.Delete(key)
  205. b := mgu.Update("luaconfig", "editor", "editor", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": 6}}, false, false)
  206. logger.Debug("Delete History Code:", code, b)
  207. }
  208. }
  209. listLen++
  210. } else {
  211. logger.Info("Code:", key, "Is Not Download List")
  212. listNoLen++
  213. }
  214. time.Sleep(100 * time.Millisecond)
  215. return true
  216. })
  217. time.Sleep(1 * time.Second)
  218. count_ok, count_no := 0, 0
  219. LoopListPath.Range(func(k, v interface{}) bool {
  220. if v != nil {
  221. count_ok++
  222. } else {
  223. count_no++
  224. }
  225. return true
  226. })
  227. InitCount = count_ok
  228. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环,列表长度,", listLen, listNoLen, "删除数量", DelLen, "执行完毕后数量统计:", count_ok, count_no)
  229. }
  230. }
  231. //排队模式下载三级页数据
  232. func QueueUpScriptDetail() {
  233. logger.Info("节能模式三级页")
  234. chanSize := util.Config.DetailChansize
  235. CC2 = make(chan *lua.LState, chanSize)
  236. for i := 0; i < chanSize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  237. CC2 <- lua.NewState(lua.Options{
  238. RegistrySize: 256 * 20,
  239. CallStackSize: 256,
  240. IncludeGoStackTrace: false,
  241. })
  242. }
  243. for {
  244. count_ok, count_no := 0, 0
  245. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环", "初始化脚本数量:", InitCount)
  246. LoopListPath.Range(func(key, temp interface{}) bool {
  247. if info, ok := temp.(map[string]string); ok {
  248. count_ok++
  249. code := info["code"]
  250. old_is_running := false
  251. tmp, b := Allspiders2.Load(code)
  252. if b {
  253. if sp_old, ok := tmp.(*Spider); ok {
  254. if !sp_old.Stop {
  255. old_is_running = true
  256. }
  257. }
  258. }
  259. logger.Info("Code:", code, "Is Downloading Detail:", old_is_running)
  260. if !old_is_running { //判断当前爬虫是否正在执行
  261. script := info["script"]
  262. sp, errstr := NewSpider_New(code, script, true)
  263. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  264. //sp.Index = qu.IntAll(key)
  265. sp.UserName = info["createuser"]
  266. sp.UserEmail = info["createuseremail"]
  267. sp.MUserName = info["modifyuser"]
  268. sp.MUserEmail = info["modifyemail"]
  269. sp.IsMainThread = true
  270. Allspiders2.Store(code, sp)
  271. go sp.DownloadListDetail("list") //下载三级页信息
  272. }
  273. }
  274. } else {
  275. logger.Info("Code:", key, "Is Not Download Detail")
  276. count_no++
  277. }
  278. time.Sleep(100 * time.Millisecond)
  279. return true
  280. })
  281. InitCount = count_ok
  282. time.Sleep(1 * time.Second)
  283. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环完毕,数量统计:", count_ok, count_no)
  284. }
  285. }
  286. //获取所有爬虫脚本--数据库
  287. func getSpiderScriptDB(code string) map[string]map[string]string {
  288. scriptSpider := map[string]map[string]string{}
  289. query := ``
  290. if code == "all" { //初始化所有脚本
  291. query = `{"state":5,"event":` + fmt.Sprint(util.Config.Uploadevent) + `}`
  292. } else { //消息在线上传
  293. query = `{"code":"` + code + `","event":` + fmt.Sprint(util.Config.Uploadevent) + `}`
  294. //query = `{"$or":[{"iupload":1},{"iupload":3}],"event":` + fmt.Sprint(util.Config.Uploadevent) + `,"modifytime":{"$gt":1502937042}}`
  295. }
  296. listdb := mgu.Find("luaconfig", "editor", "editor", query, `{"_id":-1}`, nil, false, -1, -1)
  297. //临时历史附件
  298. //listdb := mgu.Find("luaconfig_test", "editor", "editor", query, `{"_id":-1}`, nil, false, -1, -1)
  299. for _, v := range *listdb {
  300. old := qu.IntAll(v["old_lua"])
  301. script := ""
  302. if old == 1 {
  303. script = fmt.Sprint(v["luacontent"])
  304. } else {
  305. if v["oldlua"] != nil {
  306. if v["luacontent"] != nil {
  307. script = v["luacontent"].(string)
  308. }
  309. } else {
  310. script = GetScriptByTmp(v)
  311. }
  312. }
  313. scriptSpider[fmt.Sprint(v["code"])] = map[string]string{
  314. "code": fmt.Sprint(v["code"]),
  315. "type": fmt.Sprint(v["state"]),
  316. "script": script,
  317. "createuser": fmt.Sprint(v["createuser"]),
  318. "createuseremail": fmt.Sprint(v["createuseremail"]),
  319. "modifyuser": fmt.Sprint(v["modifyuser"]),
  320. "modifyemail": fmt.Sprint(v["next"]),
  321. }
  322. }
  323. return scriptSpider
  324. }
  325. //获取所有爬虫脚本--文件
  326. func getSpiderScriptFile(newscript bool) map[string]map[string]string {
  327. scriptSpider := map[string]map[string]string{}
  328. filespider := 0
  329. filepath.Walk("res", func(path string, info os.FileInfo, err error) error {
  330. if info.IsDir() {
  331. return nil
  332. } else if strings.HasPrefix(info.Name(), "spider_") &&
  333. strings.HasSuffix(info.Name(), ".lua") {
  334. //过滤test目录
  335. if strings.Contains(path, "\\test\\") {
  336. return nil
  337. }
  338. loadfile := true
  339. if newscript {
  340. if time.Now().Unix() < info.ModTime().Add(time.Duration(15)*time.Minute).Unix() {
  341. loadfile = true
  342. } else {
  343. loadfile = false
  344. }
  345. }
  346. if loadfile {
  347. f, err := os.Open(path)
  348. defer f.Close()
  349. if err != nil {
  350. logger.Error(err.Error())
  351. }
  352. buf := bufio.NewReader(f)
  353. script := ""
  354. code := ""
  355. for {
  356. line, err := buf.ReadString('\n')
  357. if code == "" && strings.Contains(line, "spiderCode=") {
  358. res := regcode.FindAllStringSubmatch(line, -1)
  359. if len(res) > 0 {
  360. code = res[0][1]
  361. //logger.Info("code", code)
  362. } else {
  363. break
  364. }
  365. }
  366. if scriptSpider[code] == nil {
  367. script = script + line + "\n"
  368. } else {
  369. break
  370. }
  371. if err != nil {
  372. break
  373. }
  374. }
  375. if code != "" && script != "" && scriptSpider[code] == nil {
  376. scriptSpider[code] = map[string]string{
  377. "code": code,
  378. "type": "5",
  379. "script": script,
  380. //脚本文件属性值空
  381. "createuser": "",
  382. "createuseremail": "",
  383. "modifyuser": "",
  384. "modifyemail": "",
  385. }
  386. filespider = filespider + 1
  387. //logger.Info("script", script)
  388. }
  389. }
  390. }
  391. return nil
  392. })
  393. logger.Info("节点", util.Config.Uploadevent, "脚本文件爬虫数", filespider)
  394. return scriptSpider
  395. }
  396. //脚本下架、上架、重载
  397. func UpdateSpiderByCodeState(code, state string) (bool, error) {
  398. up := false
  399. var err error
  400. if state != "5" && state != "-1" { //脚本下架
  401. SpiderHeart.Delete(code) //脚本下架,删除脚本对应心跳
  402. logger.Info("下架脚本", code)
  403. if util.Config.Working == 1 { //队列模式
  404. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  405. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用(7700下架爬虫)
  406. continue
  407. }
  408. tmp, b := as.Load(code)
  409. if b {
  410. sp, ok := tmp.(*Spider)
  411. if ok {
  412. if !sp.Stop { //脚本未执行
  413. sp.Stop = true
  414. }
  415. }
  416. as.Delete(code)
  417. logger.Info("下架脚本,Allspiders删除")
  418. }
  419. }
  420. //LoopListPath.Range(func(k, v interface{}) bool {
  421. // //if v != nil {
  422. // // info, _ := v.(map[string]string)
  423. // // if info["code"] == code {
  424. // // LoopListPath.Store(k, nil)
  425. // // lock.Lock()
  426. // // defer lock.Unlock()
  427. // // ChanDels[qu.IntAll(k)] = code
  428. // // logger.Info("下架脚本,LoopListPath更新为nil,ChanDels中位置:", k)
  429. // // }
  430. // //}
  431. // if k == code {
  432. // LoopListPath.Delete(k)
  433. // logger.Info(code, "脚本下架成功")
  434. // return false //跳出循环
  435. // }
  436. // return true
  437. //})
  438. } else { //高性能模式
  439. for _, as := range []sync.Map{Allspiders, Allspiders2} {
  440. if tmp, ok := as.Load(code); ok {
  441. sp, ok := tmp.(*Spider)
  442. if ok {
  443. sp.Stop = true
  444. sp.L.Close()
  445. as.Delete(code)
  446. }
  447. }
  448. }
  449. }
  450. LoopListPath.Delete(code)
  451. logger.Info(code, "脚本下架成功")
  452. up = true
  453. err = nil
  454. } else if state == "-1" { //爬虫重采更新线上爬虫
  455. scriptMap := getSpiderScriptDB(code)
  456. logger.Info("更新线上脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  457. if util.Config.Working == 1 { //排队模式
  458. for _, v := range scriptMap {
  459. listsize := 0
  460. listHas := false
  461. count_ok, count_no := 0, 0
  462. LoopListPath.Range(func(key, val interface{}) bool {
  463. listsize++
  464. if tmp, ok := val.(map[string]string); ok {
  465. count_ok++
  466. if tmp["code"] == code && key == code { //队列存在,重载脚本
  467. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  468. listHas = true
  469. LoopListPath.Store(key, v)
  470. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  471. logger.Info("队列模式更新列表页信息状态", code)
  472. }
  473. } else {
  474. count_no++
  475. }
  476. return true
  477. })
  478. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  479. if !listHas { //队列不存在
  480. logger.Info("重采更新爬虫失败:", code)
  481. up = false
  482. err = errors.New("爬虫不在线:" + code)
  483. } else {
  484. up = true
  485. err = nil
  486. logger.Info("重采更新爬虫成功", code)
  487. }
  488. }
  489. } else { //高性能模式
  490. for k, v := range scriptMap {
  491. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  492. sp := spd.(*Spider)
  493. sp.ScriptFile = v["script"]
  494. if v["createuser"] != "" {
  495. sp.UserName = v["createuser"]
  496. }
  497. if v["createuseremail"] != "" {
  498. sp.UserEmail = v["createuseremail"]
  499. }
  500. sp.MUserName = v["modifyuser"]
  501. sp.MUserEmail = v["modifyemail"]
  502. Allspiders.Store(k, sp)
  503. up = true
  504. err = nil
  505. logger.Info("重采更新爬虫成功", sp.Code)
  506. } else { //不存在
  507. up = false
  508. err = errors.New("爬虫不在线:" + code)
  509. logger.Info("重采更新爬虫失败:", code)
  510. }
  511. //Allspiders2
  512. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  513. sp2 := spd2.(*Spider)
  514. sp2.ScriptFile = v["script"]
  515. if v["createuser"] != "" {
  516. sp2.UserName = v["createuser"]
  517. }
  518. if v["createuseremail"] != "" {
  519. sp2.UserEmail = v["createuseremail"]
  520. }
  521. sp2.MUserName = v["modifyuser"]
  522. sp2.MUserEmail = v["modifyemail"]
  523. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  524. Allspiders2.Store(k, sp2)
  525. // up = true
  526. // err = nil
  527. logger.Info("Allspiders2重采更新爬虫成功", sp2.Code)
  528. } else { //不存在
  529. // up = false
  530. // err = errors.New("爬虫不在线:" + code)
  531. logger.Info("Allspiders2重采更新爬虫失败:", code)
  532. }
  533. }
  534. }
  535. } else { //脚本上架
  536. scriptMap := getSpiderScriptDB(code)
  537. logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  538. if util.Config.Working == 1 { //排队模式
  539. for _, v := range scriptMap {
  540. listsize := 0
  541. listHas := false
  542. count_ok, count_no := 0, 0
  543. LoopListPath.Range(func(key, val interface{}) bool {
  544. listsize++
  545. if tmp, ok := val.(map[string]string); ok { //此处判断仅仅为了得到count_ok的值,可直接判断key==code
  546. count_ok++
  547. if tmp["code"] == code && code == key { //队列存在,重载脚本
  548. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  549. listHas = true
  550. LoopListPath.Store(key, v)
  551. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  552. logger.Info("队列模式更新列表页信息状态", code)
  553. }
  554. } else {
  555. count_no++
  556. }
  557. return true
  558. })
  559. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  560. if !listHas { //队列中不存在,新增
  561. logger.Info("上架新增脚本,队列中不存在")
  562. LoopListPath.Store(code, v) //上架
  563. // lock.Lock()
  564. // defer lock.Unlock()
  565. // if len(ChanDels) > 0 {
  566. // for i, _ := range ChanDels {
  567. // logger.Info("上架新增脚本,替补队列中位置", i)
  568. // LoopListPath.Store(i, v)
  569. // delete(ChanDels, i)
  570. // break
  571. // }
  572. // } else {
  573. // logger.Info("上架新增脚本,新增队列中位置", listsize)
  574. // LoopListPath.Store(listsize, v) //上架
  575. // }
  576. //校验是否上架成功
  577. saveList := false //记录是否上架成功
  578. listsize, count_ok, count_no = 0, 0, 0
  579. LoopListPath.Range(func(key, val interface{}) bool {
  580. listsize++
  581. if tmp, ok := val.(map[string]string); ok {
  582. count_ok++
  583. if tmp["code"] == code && key == code { //队列存在
  584. saveList = true
  585. logger.Info("上架脚本成功", code)
  586. }
  587. } else {
  588. count_no++
  589. }
  590. return true
  591. })
  592. logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  593. if !saveList { //上架失败
  594. logger.Info("上架脚本", code, " 失败")
  595. return false, errors.New("use " + code + " failed")
  596. }
  597. }
  598. logger.Info("上架新增脚本", code)
  599. up = true
  600. }
  601. } else { //高性能模式
  602. for k, v := range scriptMap {
  603. LoopListPath.Store(k, v)
  604. //1、Allspiders对应7000、7100、7400脚本上架下载数据(列表页爬虫集合)
  605. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  606. sp := spd.(*Spider)
  607. sp.ScriptFile = v["script"]
  608. if v["createuser"] != "" {
  609. sp.UserName = v["createuser"]
  610. }
  611. if v["createuseremail"] != "" {
  612. sp.UserEmail = v["createuseremail"]
  613. }
  614. sp.MUserName = v["modifyuser"]
  615. sp.MUserEmail = v["modifyemail"]
  616. //sp.LoadScript(k, sp.ScriptFile, true) //更新上架,重载脚本
  617. Allspiders.Store(k, sp)
  618. up = true
  619. err = nil
  620. logger.Info("上架重载脚本", sp.Code)
  621. } else { //新增脚本
  622. sp, errstr := NewSpider(k, v["script"])
  623. if errstr == "" && sp != nil && sp.Code != "nil" {
  624. if v["createuser"] != "" {
  625. sp.UserName = v["createuser"]
  626. }
  627. if v["createuseremail"] != "" {
  628. sp.UserEmail = v["createuseremail"]
  629. }
  630. sp.MUserName = v["modifyuser"]
  631. sp.MUserEmail = v["modifyemail"]
  632. Allspiders.Store(k, sp)
  633. sp.Stop = false
  634. sp.StartJob()
  635. up = true
  636. err = nil
  637. logger.Info("上架新增脚本", sp.Code)
  638. } else {
  639. err = errors.New("新增失败")
  640. nowT := time.Now().Unix()
  641. mgu.Update("spider_loadfail", "spider", "spider",
  642. map[string]interface{}{
  643. "code": k,
  644. "modifytime": map[string]interface{}{
  645. "$gte": nowT - 12*3600,
  646. "$lte": nowT + 12*3600,
  647. },
  648. },
  649. map[string]interface{}{
  650. "$set": map[string]interface{}{
  651. "code": k,
  652. "type": "新增初始化脚本",
  653. "script": v["script"],
  654. "updatetime": nowT,
  655. "modifyuser": sp.MUserName,
  656. "event": util.Config.Uploadevent,
  657. "err": errstr,
  658. },
  659. }, true, false)
  660. }
  661. }
  662. //2、Allspiders2对应7100、7110、7400上架采集三级页数据(Allspiders2三级页爬虫集合)
  663. if util.Config.Modal == 1 { //高性能老模式不根据列表页数据采三级页(7000、7410)
  664. //Allspiders2
  665. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  666. sp2 := spd2.(*Spider)
  667. sp2.ScriptFile = v["script"]
  668. if v["createuser"] != "" {
  669. sp2.UserName = v["createuser"]
  670. }
  671. if v["createuseremail"] != "" {
  672. sp2.UserEmail = v["createuseremail"]
  673. }
  674. sp2.MUserName = v["modifyuser"]
  675. sp2.MUserEmail = v["modifyemail"]
  676. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  677. Allspiders2.Store(k, sp2) //重载后放入集合
  678. UpdateHighListDataByCode(k) //爬虫更新上架后,重置数据state=0
  679. // up = true
  680. // err = nil
  681. logger.Info("Allspiders2上架重载脚本", sp2.Code)
  682. } else { //新增脚本
  683. sp2, errstr := NewSpider(k, v["script"])
  684. if errstr == "" && sp2 != nil && sp2.Code != "nil" {
  685. if v["createuser"] != "" {
  686. sp2.UserName = v["createuser"]
  687. }
  688. if v["createuseremail"] != "" {
  689. sp2.UserEmail = v["createuseremail"]
  690. }
  691. sp2.MUserName = v["modifyuser"]
  692. sp2.MUserEmail = v["modifyemail"]
  693. sp2.Stop = false
  694. sp2.IsMainThread = true //多线程采集时使用
  695. go sp2.DownloadHighDetail("highlist") //根据列表页数据下载三级页
  696. Allspiders2.Store(k, sp2)
  697. // up = true
  698. // err = nil
  699. logger.Info("Allspiders2上架新增脚本", sp2.Code)
  700. } /*else {
  701. err = errors.New("新增失败")
  702. mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  703. "code": k,
  704. "type": "新增脚本失败",
  705. "script": v["script"],
  706. "intime": time.Now().Format(qu.Date_Full_Layout),
  707. "event": util.Config.Uploadevent,
  708. })
  709. }*/
  710. }
  711. }
  712. }
  713. }
  714. }
  715. logger.Info("上下架:", up, err)
  716. return up, err
  717. }
  718. //定时重载脚本文件
  719. func ReloadSpiderFile() {
  720. scriptMap := getSpiderScriptFile(true)
  721. for k, v := range scriptMap {
  722. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  723. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用
  724. continue
  725. }
  726. if spd, ok := as.Load(k); ok { //对应脚本已存在,更新
  727. sp := spd.(*Spider)
  728. logger.Info("定时重载脚本", sp.Code)
  729. sp.ScriptFile = v["script"]
  730. if v["createuser"] != "" {
  731. sp.UserName = v["createuser"]
  732. }
  733. if v["createuseremail"] != "" {
  734. sp.UserEmail = v["createuseremail"]
  735. }
  736. sp.MUserName = v["modifyuser"]
  737. sp.MUserEmail = v["modifyemail"]
  738. as.Store(k, sp)
  739. } else { //新增脚本
  740. var sp *Spider
  741. var errstr string
  742. if util.Config.Working == 1 { //排队模式
  743. if i == 0 {
  744. //length := 0
  745. //LoopListPath.Range(func(k, v interface{}) bool {
  746. // length++
  747. // return true
  748. //})
  749. LoopListPath.Store(k, v) //排队模式Allspiders,Allspiders2共用一个LoopListPath,新增一次即可
  750. sp, errstr = NewSpider_New(k, v["script"], false)
  751. } else {
  752. sp, errstr = NewSpider_New(k, v["script"], true)
  753. }
  754. } else {
  755. sp, errstr = NewSpider(k, v["script"])
  756. }
  757. if errstr == "" && sp != nil && sp.Code != "nil" {
  758. if v["createuser"] != "" {
  759. sp.UserName = v["createuser"]
  760. }
  761. if v["createuseremail"] != "" {
  762. sp.UserEmail = v["createuseremail"]
  763. }
  764. sp.MUserName = v["modifyuser"]
  765. sp.MUserEmail = v["modifyemail"]
  766. as.Store(k, sp)
  767. if util.Config.Working == 1 {
  768. sp.Stop = true
  769. // if i == 0 {
  770. // length := 0
  771. // LoopListPath.Range(func(k, v interface{}) bool {
  772. // length++
  773. // return true
  774. // })
  775. // LoopListPath.Store(length, v)
  776. // }
  777. } else {
  778. sp.Stop = false
  779. if i == 0 { //高性能模式只有Allspiders启动爬虫,Allspiders2只负责下三级页
  780. sp.StartJob()
  781. }
  782. }
  783. logger.Info("定时重载脚本--新增", sp.Code)
  784. } else {
  785. if i == 0 {
  786. nowT := time.Now().Unix()
  787. mgu.Update("spider_loadfail", "spider", "spider",
  788. map[string]interface{}{
  789. "code": k,
  790. "modifytime": map[string]interface{}{
  791. "$gte": nowT - 12*3600,
  792. "$lte": nowT + 12*3600,
  793. },
  794. },
  795. map[string]interface{}{
  796. "$set": map[string]interface{}{
  797. "code": k,
  798. "type": "定时重载--新增失败",
  799. "script": v["script"],
  800. "updatetime": nowT,
  801. "modifyuser": sp.MUserName,
  802. "event": util.Config.Uploadevent,
  803. "err": errstr,
  804. },
  805. }, true, false)
  806. }
  807. }
  808. }
  809. }
  810. // if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  811. // sp := spd.(*Spider)
  812. // logger.Info("定时重载脚本", sp.Code)
  813. // sp.ScriptFile = v["script"]
  814. // if v["createuser"] != "" {
  815. // sp.UserName = v["createuser"]
  816. // }
  817. // if v["createuseremail"] != "" {
  818. // sp.UserEmail = v["createuseremail"]
  819. // }
  820. // sp.MUserName = v["modifyuser"]
  821. // sp.MUserEmail = v["modifyemail"]
  822. // Allspiders.Store(k, sp)
  823. // } else { //新增脚本
  824. // var sp *Spider
  825. // if util.Config.Working == 1 { //排队模式
  826. // length := 0
  827. // LoopListPath.Range(func(k, v interface{}) bool {
  828. // length++
  829. // return true
  830. // })
  831. // LoopListPath.Store(length, v)
  832. // sp = NewSpider_New(k, v["script"], false)
  833. // } else {
  834. // sp = NewSpider(k, v["script"])
  835. // }
  836. // if sp != nil && sp.Code != "nil" {
  837. // if v["createuser"] != "" {
  838. // sp.UserName = v["createuser"]
  839. // }
  840. // if v["createuseremail"] != "" {
  841. // sp.UserEmail = v["createuseremail"]
  842. // }
  843. // sp.MUserName = v["modifyuser"]
  844. // sp.MUserEmail = v["modifyemail"]
  845. // Allspiders.Store(k, sp)
  846. // if util.Config.Working == 1 {
  847. // sp.Stop = true
  848. // length := 0
  849. // LoopListPath.Range(func(k, v interface{}) bool {
  850. // length++
  851. // return true
  852. // })
  853. // LoopListPath.Store(length, v)
  854. // } else {
  855. // sp.Stop = false
  856. // sp.StartJob()
  857. // }
  858. // logger.Info("定时重载脚本--新增", sp.Code)
  859. // } else {
  860. // mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  861. // "code": k,
  862. // "type": "定时重载--新增失败",
  863. // "script": v["script"],
  864. // "intime": time.Now().Format(qu.Date_Full_Layout),
  865. // "event": util.Config.Uploadevent,
  866. // })
  867. // }
  868. // }
  869. }
  870. util.TimeAfterFunc(time.Duration(15)*time.Minute, ReloadSpiderFile, TimeChan)
  871. }
  872. //排队模式生成爬虫
  873. func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
  874. defer mu.Catch()
  875. spider := &Spider{}
  876. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, false)
  877. if err != "" {
  878. return nil, err
  879. }
  880. spider.Code = spider.GetVar("spiderCode")
  881. spider.Script.SCode = spider.Code
  882. spider.Name = spider.GetVar("spiderName")
  883. spider.Channel = spider.GetVar("spiderChannel")
  884. //spider.LastExecTime = GetLastExectime(spider.Code)
  885. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  886. spider.Collection = spider.GetVar("spider2Collection")
  887. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  888. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  889. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  890. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  891. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  892. if spiderSleepBase == -1 {
  893. spider.SleepBase = 1000
  894. } else {
  895. spider.SleepBase = spiderSleepBase
  896. }
  897. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  898. if spiderSleepRand == -1 {
  899. spider.SleepRand = 1000
  900. } else {
  901. spider.SleepRand = spiderSleepRand
  902. }
  903. spiderTimeout := spider.GetIntVar("spiderTimeout")
  904. if spiderTimeout == -1 {
  905. spider.Timeout = 60
  906. } else {
  907. spider.Timeout = int64(spiderTimeout)
  908. }
  909. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  910. if v, ok := Allspiders.Load(spider.Code); ok {
  911. sp := v.(*Spider)
  912. spider.TodayDowncount = sp.TodayDowncount
  913. spider.ToDayRequestNum = sp.ToDayRequestNum
  914. spider.YesterdayDowncount = sp.YesterdayDowncount
  915. spider.YestoDayRequestNum = sp.YestoDayRequestNum
  916. spider.TotalDowncount = sp.TotalDowncount
  917. spider.TotalRequestNum = sp.TotalRequestNum
  918. spider.ErrorNum = sp.ErrorNum
  919. spider.RoundCount = sp.RoundCount
  920. }
  921. spider.UserName = spider.GetVar("spiderUserName")
  922. spider.UserEmail = spider.GetVar("spiderUserEmail")
  923. spider.UploadTime = spider.GetVar("spiderUploadTime")
  924. //新增历史补漏
  925. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  926. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  927. //新老爬虫
  928. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  929. return spider, ""
  930. }
  931. //高性能模式生成爬虫
  932. func NewSpider(code, luafile string) (*Spider, string) {
  933. defer mu.Catch()
  934. spider := &Spider{}
  935. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, false)
  936. if err != "" {
  937. return nil, err
  938. }
  939. spider.Code = spider.GetVar("spiderCode")
  940. spider.SCode = spider.Code
  941. spider.Name = spider.GetVar("spiderName")
  942. spider.Channel = spider.GetVar("spiderChannel")
  943. //spider.LastExecTime = GetLastExectime(spider.Code)
  944. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  945. spider.Collection = spider.GetVar("spider2Collection")
  946. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  947. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  948. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  949. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  950. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  951. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  952. if spiderSleepBase == -1 {
  953. spider.SleepBase = 1000
  954. } else {
  955. spider.SleepBase = spiderSleepBase
  956. }
  957. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  958. if spiderSleepRand == -1 {
  959. spider.SleepRand = 1000
  960. } else {
  961. spider.SleepRand = spiderSleepRand
  962. }
  963. spiderTimeout := spider.GetIntVar("spiderTimeout")
  964. if spiderTimeout == -1 {
  965. spider.Timeout = 60
  966. } else {
  967. spider.Timeout = int64(spiderTimeout)
  968. }
  969. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  970. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  971. tmp := GetDownloadLast(spider.Code, date) //
  972. if len(tmp) > 0 {
  973. spider.TodayDowncount = int32(qu.IntAll(tmp["todaydowncount"]))
  974. spider.ToDayRequestNum = int32(qu.IntAll(tmp["todaydownreq"]))
  975. spider.YesterdayDowncount = int32(qu.IntAll(tmp["yesdowncount"]))
  976. spider.YestoDayRequestNum = int32(qu.IntAll(tmp["yesdownreq"]))
  977. spider.TotalDowncount = spider.TodayDowncount + int32(qu.IntAll(tmp["totaldown"]))
  978. spider.TotalRequestNum = spider.ToDayRequestNum + int32(qu.IntAll(tmp["totalreq"]))
  979. }
  980. spider.UserName = spider.GetVar("spiderUserName")
  981. spider.UserEmail = spider.GetVar("spiderUserEmail")
  982. spider.UploadTime = spider.GetVar("spiderUploadTime")
  983. //新增历史补漏
  984. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  985. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  986. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  987. //新老爬虫
  988. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  989. return spider, ""
  990. }
  991. //多线程生成爬虫
  992. func NewSpiderForThread(code, luafile string) (*Spider, string) {
  993. defer mu.Catch()
  994. spider := &Spider{}
  995. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, true)
  996. if err != "" {
  997. return nil, err
  998. }
  999. spider.Code = spider.GetVar("spiderCode")
  1000. spider.SCode = spider.Code
  1001. spider.Script.SCode = spider.Code
  1002. spider.Name = spider.GetVar("spiderName")
  1003. spider.Channel = spider.GetVar("spiderChannel")
  1004. //spider.LastExecTime = GetLastExectime(spider.Code)
  1005. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  1006. spider.Collection = spider.GetVar("spider2Collection")
  1007. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  1008. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  1009. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  1010. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  1011. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  1012. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  1013. if spiderSleepBase == -1 {
  1014. spider.SleepBase = 1000
  1015. } else {
  1016. spider.SleepBase = spiderSleepBase
  1017. }
  1018. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  1019. if spiderSleepRand == -1 {
  1020. spider.SleepRand = 1000
  1021. } else {
  1022. spider.SleepRand = spiderSleepRand
  1023. }
  1024. spiderTimeout := spider.GetIntVar("spiderTimeout")
  1025. if spiderTimeout == -1 {
  1026. spider.Timeout = 60
  1027. } else {
  1028. spider.Timeout = int64(spiderTimeout)
  1029. }
  1030. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  1031. spider.UserName = spider.GetVar("spiderUserName")
  1032. spider.UserEmail = spider.GetVar("spiderUserEmail")
  1033. spider.UploadTime = spider.GetVar("spiderUploadTime")
  1034. //新增历史补漏
  1035. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  1036. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  1037. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  1038. //新老爬虫
  1039. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  1040. return spider, ""
  1041. }
  1042. //下载量入库
  1043. func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum, yesterdayDowncount, yestoDayRequestNum int32) {
  1044. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  1045. updata := M{}
  1046. if addtotal {
  1047. updata = M{
  1048. "$inc": M{"totaldown": todayDowncount, "totalreq": todayRequestNum},
  1049. "$set": M{
  1050. "yesdowncount": yesterdayDowncount,
  1051. "yesdownreq": yestoDayRequestNum,
  1052. "todaydowncount": todayDowncount,
  1053. "todaydownreq": todayRequestNum,
  1054. "date": date,
  1055. "year": time.Now().Year(),
  1056. "month": time.Now().Month(),
  1057. "day": time.Now().Day(),
  1058. },
  1059. }
  1060. } else {
  1061. updata = M{
  1062. "$set": M{
  1063. "yesdowncount": yesterdayDowncount,
  1064. "yesdownreq": yestoDayRequestNum,
  1065. "todaydowncount": todayDowncount,
  1066. "todaydownreq": todayRequestNum,
  1067. "date": date,
  1068. "year": time.Now().Year(),
  1069. "month": time.Now().Month(),
  1070. "day": time.Now().Day(),
  1071. },
  1072. }
  1073. }
  1074. mgu.Update("spider_downlog", "spider", "spider", M{"code": code, "date": date}, updata, true, false)
  1075. }
  1076. //获取下载的上下限(没用)
  1077. func GetLimitDownload(code string) (uplimit, lowlimit int) {
  1078. defer mu.Catch()
  1079. ret := mgu.FindOne("spider_ldtime", "spider", "spider", `{"code":"`+code+`"}`)
  1080. if *ret != nil {
  1081. uplimit = qu.IntAll((*ret)["uplimit"])
  1082. lowlimit = qu.IntAll((*ret)["lowlimit"])
  1083. return uplimit, lowlimit
  1084. } else {
  1085. return 100, 0
  1086. }
  1087. }
  1088. //拼装脚本
  1089. func GetScriptByTmp(luaconfig map[string]interface{}) string {
  1090. defer mu.Catch()
  1091. script := ""
  1092. if luaconfig["listcheck"] == nil {
  1093. luaconfig["listcheck"] = ""
  1094. }
  1095. if luaconfig["contentcheck"] == nil {
  1096. luaconfig["contentcheck"] = ""
  1097. }
  1098. if luaconfig != nil && len(luaconfig) > 0 {
  1099. common := luaconfig["param_common"].([]interface{})
  1100. //新增spiderIsHistoricalMend spiderIsMustDownload
  1101. if len(common) == 15 {
  1102. common = append(common, "", "", "")
  1103. } else {
  1104. common = append(common, false, false, "", "", "")
  1105. }
  1106. for k, v := range common {
  1107. if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
  1108. common[k] = qu.IntAll(v)
  1109. }
  1110. }
  1111. script, _ = GetTmpModel(map[string][]interface{}{"common": common})
  1112. script_time := ""
  1113. if luaconfig["type_time"] == 0 {
  1114. time := luaconfig["param_time"].([]interface{})
  1115. script_time, _ = GetTmpModel(map[string][]interface{}{
  1116. "time": time,
  1117. })
  1118. } else {
  1119. script_time = luaconfig["str_time"].(string)
  1120. }
  1121. script_list := ""
  1122. if luaconfig["type_list"] == 0 {
  1123. list := luaconfig["param_list"].([]interface{})
  1124. addrs := strings.Split(list[1].(string), "\n")
  1125. if len(addrs) > 0 {
  1126. for k, v := range addrs {
  1127. addrs[k] = "'" + v + "'"
  1128. }
  1129. list[1] = strings.Join(addrs, ",")
  1130. } else {
  1131. list[1] = ""
  1132. }
  1133. script_list, _ = GetTmpModel(map[string][]interface{}{
  1134. "list": list,
  1135. "listcheck": []interface{}{luaconfig["listcheck"]},
  1136. })
  1137. } else {
  1138. script_list = luaconfig["str_list"].(string)
  1139. }
  1140. script_content := ""
  1141. if luaconfig["type_content"] == 0 {
  1142. content := luaconfig["param_content"].([]interface{})
  1143. script_content, _ = GetTmpModel(map[string][]interface{}{
  1144. "content": content,
  1145. "contentcheck": []interface{}{luaconfig["contentcheck"]},
  1146. })
  1147. } else {
  1148. script_content = luaconfig["str_content"].(string)
  1149. }
  1150. script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"])
  1151. script += `
  1152. ` + script_time + `
  1153. ` + script_list + `
  1154. ` + script_content
  1155. script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
  1156. }
  1157. return script
  1158. }
  1159. //生成爬虫脚本
  1160. func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
  1161. qu.Try(func() {
  1162. if param != nil && param["common"] != nil {
  1163. if len(param["common"]) < 12 {
  1164. err = "公共参数配置不全"
  1165. } else {
  1166. script = fmt.Sprintf(util.Tmp_common, param["common"]...)
  1167. }
  1168. }
  1169. if param != nil && param["time"] != nil {
  1170. if len(param["time"]) < 3 {
  1171. err = "方法:time-参数配置不全"
  1172. } else {
  1173. script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
  1174. }
  1175. }
  1176. if param != nil && param["list"] != nil {
  1177. if len(param["list"]) < 7 {
  1178. err = "方法:list-参数配置不全"
  1179. } else {
  1180. list := []interface{}{param["listcheck"][0]}
  1181. list = append(list, param["list"]...)
  1182. script += fmt.Sprintf(util.Tmp_pagelist, list...)
  1183. script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
  1184. }
  1185. }
  1186. if param != nil && param["content"] != nil {
  1187. if len(param["content"]) < 2 {
  1188. err = "方法:content-参数配置不全"
  1189. } else {
  1190. content := []interface{}{param["contentcheck"][0]}
  1191. content = append(content, param["content"]...)
  1192. script += fmt.Sprintf(util.Tmp_content, content...)
  1193. }
  1194. }
  1195. }, func(e interface{}) {
  1196. err = e
  1197. })
  1198. return script, err
  1199. }
  1200. //补充模型
  1201. func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
  1202. defer mu.Catch()
  1203. //补充通用信息
  1204. commstr := `item["spidercode"]="` + comm[0].(string) + `";`
  1205. commstr += `item["site"]="` + comm[1].(string) + `";`
  1206. commstr += `item["channel"]="` + comm[2].(string) + `";`
  1207. script = strings.Replace(script, "--Common--", commstr, -1)
  1208. //补充模型信息
  1209. modelstr := ""
  1210. for k, v := range model {
  1211. modelstr += `item["` + k + `"]="` + v.(string) + `";`
  1212. }
  1213. script = strings.Replace(script, "--Model--", modelstr, -1)
  1214. return script
  1215. }
  1216. //爬虫信息提交编辑器(心跳)
  1217. func SpiderInfoSend() {
  1218. time.Sleep(15 * time.Second)
  1219. list := []interface{}{}
  1220. Allspiders.Range(func(key, value interface{}) bool {
  1221. v := value.(*Spider)
  1222. info := map[string]interface{}{}
  1223. info["code"] = v.Code
  1224. info["todayDowncount"] = v.TodayDowncount
  1225. info["toDayRequestNum"] = v.ToDayRequestNum
  1226. info["yesterdayDowncount"] = v.YesterdayDowncount
  1227. info["yestoDayRequestNum"] = v.YestoDayRequestNum
  1228. info["totalDowncount"] = v.TotalDowncount
  1229. info["totalRequestNum"] = v.TotalRequestNum
  1230. info["errorNum"] = v.ErrorNum
  1231. info["roundCount"] = v.RoundCount
  1232. info["runRate"] = v.SpiderRunRate
  1233. info["lastHeartbeat"] = v.LastHeartbeat
  1234. info["lastDowncount"] = v.LastDowncount
  1235. info["lstate"] = v.L.Status(v.L)
  1236. list = append(list, info)
  1237. return true
  1238. })
  1239. bs, _ := json.Marshal(list)
  1240. value := url.Values{
  1241. "data": []string{util.Se.EncodeString(string(bs))},
  1242. "type": []string{"info"},
  1243. }
  1244. _, err := http.PostForm(util.Config.Editoraddr, value)
  1245. if err != nil {
  1246. logger.Error("send to editor: ", err.Error())
  1247. }
  1248. util.TimeAfterFunc(5*time.Minute, SpiderInfoSend, TimeChan)
  1249. }
  1250. //保存心跳信息
  1251. func SaveHeartInfo() {
  1252. time.Sleep(30 * time.Second)
  1253. num := 0
  1254. SpiderHeart.Range(func(key, value interface{}) bool {
  1255. code := key.(string)
  1256. heart, ok := value.(*Heart)
  1257. if ok {
  1258. num++
  1259. update := []map[string]interface{}{}
  1260. update = append(update, map[string]interface{}{"code": code})
  1261. update = append(update, map[string]interface{}{"$set": map[string]interface{}{
  1262. "site": heart.Site,
  1263. "channel": heart.Channel,
  1264. "list": heart.ListHeart,
  1265. "findlist": heart.FindListHeart,
  1266. "detail": heart.DetailHeart,
  1267. "detailexecute": heart.DetailExecuteHeart,
  1268. "modifyuser": heart.ModifyUser,
  1269. "event": util.Config.Uploadevent,
  1270. "updatetime": time.Now().Unix(),
  1271. "del": false,
  1272. }})
  1273. UpdataHeartCache <- update
  1274. }
  1275. return true
  1276. })
  1277. logger.Info("更新心跳个数:", num)
  1278. time.AfterFunc(20*time.Minute, SaveHeartInfo)
  1279. }
  1280. //信息提交编辑器
  1281. func SpiderCodeSendToEditor(code string) {
  1282. defer qu.Catch()
  1283. ok := false
  1284. for i := 1; i <= 3; i++ {
  1285. logger.Info("Code:", code, " times:", i, " Send Move Event")
  1286. list := []interface{}{}
  1287. list = append(list, code)
  1288. bs, _ := json.Marshal(list)
  1289. value := url.Values{
  1290. "data": []string{util.Se.EncodeString(string(bs))},
  1291. "type": []string{"code"},
  1292. }
  1293. res, err := http.PostForm(util.Config.Editoraddr, value)
  1294. if err != nil {
  1295. logger.Error("Send To Editor For Move Event Failed,Code:", code)
  1296. } else {
  1297. if res != nil {
  1298. res.Body.Close()
  1299. }
  1300. ok = true
  1301. break
  1302. }
  1303. }
  1304. logger.Info("Code:", code, " Send Move Event:", ok)
  1305. mgu.Save("luamovelog", "editor", "editor", map[string]interface{}{
  1306. "code": code,
  1307. "comeintime": time.Now().Unix(),
  1308. "type": "sendfail",
  1309. "ok": ok,
  1310. })
  1311. }