handler.go 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457
  1. package spider
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "github.com/cjoudrey/gluahttp"
  8. lujson "github.com/yuin/gopher-json"
  9. "net/http"
  10. "net/url"
  11. "os"
  12. "path/filepath"
  13. qu "qfw/util"
  14. "regexp"
  15. util "spiderutil"
  16. "strings"
  17. "sync"
  18. "time"
  19. "github.com/donnie4w/go-logger/logger"
  20. "github.com/yuin/gopher-lua"
  21. )
  22. var SpiderHeart sync.Map = sync.Map{} //爬虫心跳
  23. var Allspiders sync.Map = sync.Map{}
  24. var Allspiders2 sync.Map = sync.Map{}
  25. var LoopListPath sync.Map = sync.Map{}
  26. //var ChanDels = map[int]string{}
  27. //var lock sync.Mutex
  28. var CC chan *lua.LState
  29. var CC2 chan *lua.LState
  30. var Chansize int
  31. var regcode, _ = regexp.Compile(`="(.*)"`)
  32. var InitCount int
  33. var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
  34. func InitSpider() {
  35. scriptMap := getSpiderScriptDB("all") //加载爬虫,初始化模板
  36. scriptMapFile := getSpiderScriptFile(false)
  37. for code, v := range scriptMap {
  38. LoopListPath.Store(code, v)
  39. InitCount++
  40. }
  41. for code, v := range scriptMapFile {
  42. LoopListPath.Store(code, v)
  43. InitCount++
  44. }
  45. if util.Config.Working == 0 {
  46. NoQueueScript() //高性能模式
  47. } else {
  48. if util.Config.Modal == 0 { //原始模式
  49. QueueUpScriptList()
  50. } else { //列表页和三级页分开采集
  51. go QueueUpScriptList() //节能模式列表页
  52. go QueueUpScriptDetail() //节能模式三级页
  53. }
  54. }
  55. }
  56. //高性能模式
  57. func NoQueueScript() {
  58. list, _ := MgoS.Find("spider_ldtime", nil, nil, map[string]interface{}{"code": 1, "uplimit": 1, "lowlimit": 1}, false, -1, -1)
  59. LoopListPath.Range(func(key, temp interface{}) bool {
  60. if info, ok := temp.(map[string]string); ok {
  61. code := info["code"]
  62. script := info["script"]
  63. sp, errstr := CreateSpider(code, script, true, false)
  64. if errstr == "" && sp != nil && sp.Code != "nil" { //脚本加载成功
  65. //sp.Index = qu.IntAll(key)
  66. //sp2.Index = qu.IntAll(key)
  67. if info["createuser"] != "" {
  68. sp.UserName = info["createuser"]
  69. }
  70. if info["createuseremail"] != "" {
  71. sp.UserEmail = info["createuseremail"]
  72. }
  73. sp.MUserName = info["modifyuser"]
  74. sp.MUserEmail = info["modifyemail"]
  75. Allspiders.Store(sp.Code, sp)
  76. for _, tmp := range *list {
  77. if qu.ObjToString(tmp["code"]) == sp.Code {
  78. sp.UpperLimit = qu.IntAll(tmp["uplimit"])
  79. //sp2.UpperLimit = qu.IntAll(tmp["uplimit"])
  80. sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
  81. //sp2.LowerLimit = qu.IntAll(tmp["lowlimit"])
  82. break
  83. }
  84. }
  85. if util.Config.Modal == 1 { //列表页、三级页分开采集模式
  86. sp2, _ := CreateSpider(code, script, true, false)
  87. sp2.UserName = sp.UserName
  88. sp2.UserEmail = sp.UserEmail
  89. sp2.MUserName = sp.MUserName
  90. sp2.MUserEmail = sp.MUserEmail
  91. sp2.IsMainThread = true //多线程采集时使用
  92. Allspiders2.Store(sp.Code, sp2)
  93. }
  94. sp.StartJob()
  95. //util.TimeSleepFunc(10*time.Millisecond, TimeSleepChan)
  96. } else {
  97. logger.Info(code, "脚本加载失败,请检查!")
  98. nowT := time.Now().Unix()
  99. username := "异常"
  100. if sp != nil {
  101. username = sp.MUserName
  102. }
  103. MgoS.Update("spider_loadfail",
  104. map[string]interface{}{
  105. "code": code,
  106. "modifytime": map[string]interface{}{
  107. "$gte": nowT - 12*3600,
  108. "$lte": nowT + 12*3600,
  109. },
  110. },
  111. map[string]interface{}{
  112. "$set": map[string]interface{}{
  113. "code": code,
  114. "type": "初始化",
  115. "script": script,
  116. "updatetime": nowT,
  117. "modifyuser": username,
  118. "event": util.Config.Uploadevent,
  119. "err": errstr,
  120. },
  121. }, true, false)
  122. }
  123. time.Sleep(100 * time.Millisecond)
  124. }
  125. return true
  126. })
  127. InitAllLuaOver <- true //爬虫初始化完毕
  128. logger.Info("高性能模式:LUA加载完成")
  129. numSpider := 0
  130. Allspiders.Range(func(key, value interface{}) bool {
  131. numSpider++
  132. return true
  133. })
  134. logger.Info("总共加载脚本数:", numSpider)
  135. }
  136. //排队模式下载列表页数据
  137. func QueueUpScriptList() {
  138. logger.Info("节能模式列表页")
  139. CC = make(chan *lua.LState, util.Config.Chansize)
  140. for i := 0; i < util.Config.Chansize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  141. CC <- lua.NewState(lua.Options{
  142. RegistrySize: 256 * 20,
  143. CallStackSize: 256,
  144. IncludeGoStackTrace: false,
  145. })
  146. }
  147. for {
  148. listLen, listNoLen, DelLen := 0, 0, 0
  149. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环", "初始化脚本数量:", InitCount)
  150. LoopListPath.Range(func(key, temp interface{}) bool {
  151. if info, ok := temp.(map[string]string); ok {
  152. code := info["code"]
  153. old_is_running := false
  154. tmp, b := Allspiders.Load(code)
  155. if b {
  156. if sp_old, ok := tmp.(*Spider); ok {
  157. if !sp_old.Stop {
  158. old_is_running = true
  159. }
  160. }
  161. }
  162. logger.Info("Code:", code, "Is Downloading List:", old_is_running)
  163. if !old_is_running { //判断当前爬虫是否正在执行
  164. script := info["script"]
  165. sp, errstr := CreateSpider(code, script, false, false)
  166. //logger.Info("初始化脚本是否成功:", sp != nil, e.Value)
  167. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  168. //sp.Index = qu.IntAll(key)
  169. sp.UserName = info["createuser"]
  170. sp.UserEmail = info["createuseremail"]
  171. sp.MUserName = info["modifyuser"]
  172. sp.MUserEmail = info["modifyemail"]
  173. Allspiders.Store(code, sp)
  174. sp.StartJob()
  175. } else {
  176. nowT := time.Now().Unix()
  177. username := "异常"
  178. if sp != nil {
  179. username = sp.MUserName
  180. }
  181. MgoS.Update("spider_loadfail",
  182. map[string]interface{}{
  183. "code": code,
  184. "modifytime": map[string]interface{}{
  185. "$gte": nowT - 12*3600,
  186. "$lte": nowT + 12*3600,
  187. },
  188. },
  189. map[string]interface{}{
  190. "$set": map[string]interface{}{
  191. "code": code,
  192. "type": "初始化",
  193. "script": script,
  194. "updatetime": nowT,
  195. "modifyuser": username,
  196. "event": util.Config.Uploadevent,
  197. "err": errstr,
  198. },
  199. }, true, false)
  200. }
  201. if sp != nil && sp.IsHistoricalMend { //下载历史的爬虫执行一次后删除
  202. DelLen++
  203. LoopListPath.Delete(key)
  204. b = MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": 6}}, false, false)
  205. logger.Debug("Delete History Code:", code, b)
  206. }
  207. }
  208. listLen++
  209. } else {
  210. logger.Info("Code:", key, "Is Not Download List")
  211. listNoLen++
  212. }
  213. time.Sleep(100 * time.Millisecond)
  214. return true
  215. })
  216. time.Sleep(1 * time.Second)
  217. count_ok, count_no := 0, 0
  218. LoopListPath.Range(func(k, v interface{}) bool {
  219. if v != nil {
  220. count_ok++
  221. } else {
  222. count_no++
  223. }
  224. return true
  225. })
  226. InitCount = count_ok
  227. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环,列表长度,", listLen, listNoLen, "删除数量", DelLen, "执行完毕后数量统计:", count_ok, count_no)
  228. }
  229. }
  230. //排队模式下载三级页数据
  231. func QueueUpScriptDetail() {
  232. logger.Info("节能模式三级页")
  233. chanSize := util.Config.DetailChansize
  234. CC2 = make(chan *lua.LState, chanSize)
  235. for i := 0; i < chanSize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
  236. CC2 <- lua.NewState(lua.Options{
  237. RegistrySize: 256 * 20,
  238. CallStackSize: 256,
  239. IncludeGoStackTrace: false,
  240. })
  241. }
  242. for {
  243. count_ok, count_no := 0, 0
  244. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环", "初始化脚本数量:", InitCount)
  245. LoopListPath.Range(func(key, temp interface{}) bool {
  246. if info, ok := temp.(map[string]string); ok {
  247. count_ok++
  248. code := info["code"]
  249. old_is_running := false
  250. tmp, b := Allspiders2.Load(code)
  251. if b {
  252. if sp_old, ok := tmp.(*Spider); ok {
  253. if !sp_old.Stop {
  254. old_is_running = true
  255. }
  256. }
  257. }
  258. logger.Info("Code:", code, "Is Downloading Detail:", old_is_running)
  259. if !old_is_running { //判断当前爬虫是否正在执行
  260. script := info["script"]
  261. sp, errstr := CreateSpider(code, script, true, false)
  262. if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
  263. //sp.Index = qu.IntAll(key)
  264. sp.UserName = info["createuser"]
  265. sp.UserEmail = info["createuseremail"]
  266. sp.MUserName = info["modifyuser"]
  267. sp.MUserEmail = info["modifyemail"]
  268. sp.IsMainThread = true
  269. Allspiders2.Store(code, sp)
  270. go sp.DownloadListDetail(false) //下载三级页信息
  271. }
  272. }
  273. } else {
  274. logger.Info("Code:", key, "Is Not Download Detail")
  275. count_no++
  276. }
  277. time.Sleep(100 * time.Millisecond)
  278. return true
  279. })
  280. InitCount = count_ok
  281. time.Sleep(1 * time.Second)
  282. logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环完毕,数量统计:", count_ok, count_no)
  283. }
  284. }
  285. //获取所有爬虫脚本--数据库
  286. func getSpiderScriptDB(code string) map[string]map[string]string {
  287. scriptSpider := map[string]map[string]string{}
  288. query := map[string]interface{}{}
  289. if code == "all" { //初始化所有脚本
  290. query = map[string]interface{}{"state": 5, "event": util.Config.Uploadevent}
  291. } else { //消息在线上传
  292. query = map[string]interface{}{"code": code, "event": util.Config.Uploadevent}
  293. //query = `{"$or":[{"iupload":1},{"iupload":3}],"event":` + fmt.Sprint(util.Config.Uploadevent) + `,"modifytime":{"$gt":1502937042}}`
  294. }
  295. listdb, _ := MgoEB.Find("luaconfig", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
  296. //临时历史附件
  297. //listdb, _ := MgoEB.Find("luaconfig_test", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
  298. for _, v := range *listdb {
  299. old := qu.IntAll(v["old_lua"])
  300. script := ""
  301. if old == 1 {
  302. script = fmt.Sprint(v["luacontent"])
  303. } else {
  304. if v["oldlua"] != nil {
  305. if v["luacontent"] != nil {
  306. script = v["luacontent"].(string)
  307. }
  308. } else {
  309. script = GetScriptByTmp(v)
  310. }
  311. }
  312. scriptSpider[fmt.Sprint(v["code"])] = map[string]string{
  313. "code": fmt.Sprint(v["code"]),
  314. "type": fmt.Sprint(v["state"]),
  315. "script": script,
  316. "createuser": fmt.Sprint(v["createuser"]),
  317. "createuseremail": fmt.Sprint(v["createuseremail"]),
  318. "modifyuser": fmt.Sprint(v["modifyuser"]),
  319. "modifyemail": fmt.Sprint(v["next"]),
  320. }
  321. }
  322. return scriptSpider
  323. }
  324. //获取所有爬虫脚本--文件
  325. func getSpiderScriptFile(newscript bool) map[string]map[string]string {
  326. scriptSpider := map[string]map[string]string{}
  327. filespider := 0
  328. filepath.Walk("res", func(path string, info os.FileInfo, err error) error {
  329. if info.IsDir() {
  330. return nil
  331. } else if strings.HasPrefix(info.Name(), "spider_") &&
  332. strings.HasSuffix(info.Name(), ".lua") {
  333. //过滤test目录
  334. if strings.Contains(path, "\\test\\") {
  335. return nil
  336. }
  337. loadfile := true
  338. if newscript {
  339. if time.Now().Unix() < info.ModTime().Add(time.Duration(15)*time.Minute).Unix() {
  340. loadfile = true
  341. } else {
  342. loadfile = false
  343. }
  344. }
  345. if loadfile {
  346. f, err := os.Open(path)
  347. defer f.Close()
  348. if err != nil {
  349. logger.Error(err.Error())
  350. }
  351. buf := bufio.NewReader(f)
  352. script := ""
  353. code := ""
  354. for {
  355. line, err := buf.ReadString('\n')
  356. if code == "" && strings.Contains(line, "spiderCode=") {
  357. res := regcode.FindAllStringSubmatch(line, -1)
  358. if len(res) > 0 {
  359. code = res[0][1]
  360. //logger.Info("code", code)
  361. } else {
  362. break
  363. }
  364. }
  365. if scriptSpider[code] == nil {
  366. script = script + line + "\n"
  367. } else {
  368. break
  369. }
  370. if err != nil {
  371. break
  372. }
  373. }
  374. if code != "" && script != "" && scriptSpider[code] == nil {
  375. scriptSpider[code] = map[string]string{
  376. "code": code,
  377. "type": "5",
  378. "script": script,
  379. //脚本文件属性值空
  380. "createuser": "",
  381. "createuseremail": "",
  382. "modifyuser": "",
  383. "modifyemail": "",
  384. }
  385. filespider = filespider + 1
  386. //logger.Info("script", script)
  387. }
  388. }
  389. }
  390. return nil
  391. })
  392. logger.Info("节点", util.Config.Uploadevent, "脚本文件爬虫数", filespider)
  393. return scriptSpider
  394. }
  395. //脚本下架、上架、重载
  396. func UpdateSpiderByCodeState(code, state string) (bool, error) {
  397. up := false
  398. var err error
  399. if state != "5" && state != "-1" { //脚本下架
  400. SpiderHeart.Delete(code) //脚本下架,删除脚本对应心跳
  401. logger.Info("下架脚本", code)
  402. if util.Config.Working == 1 { //队列模式
  403. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  404. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用(7700下架爬虫)
  405. break
  406. }
  407. tmp, b := as.Load(code)
  408. if b {
  409. sp, ok := tmp.(*Spider)
  410. if ok {
  411. if !sp.Stop { //脚本未执行
  412. sp.Stop = true
  413. }
  414. }
  415. as.Delete(code)
  416. logger.Info("下架脚本,Allspiders删除")
  417. }
  418. }
  419. //LoopListPath.Range(func(k, v interface{}) bool {
  420. // //if v != nil {
  421. // // info, _ := v.(map[string]string)
  422. // // if info["code"] == code {
  423. // // LoopListPath.Store(k, nil)
  424. // // lock.Lock()
  425. // // defer lock.Unlock()
  426. // // ChanDels[qu.IntAll(k)] = code
  427. // // logger.Info("下架脚本,LoopListPath更新为nil,ChanDels中位置:", k)
  428. // // }
  429. // //}
  430. // if k == code {
  431. // LoopListPath.Delete(k)
  432. // logger.Info(code, "脚本下架成功")
  433. // return false //跳出循环
  434. // }
  435. // return true
  436. //})
  437. } else { //高性能模式
  438. for _, as := range []sync.Map{Allspiders, Allspiders2} {
  439. if tmp, ok := as.Load(code); ok {
  440. sp, ok := tmp.(*Spider)
  441. if ok {
  442. sp.Stop = true
  443. sp.L.Close()
  444. as.Delete(code)
  445. }
  446. }
  447. }
  448. }
  449. LoopListPath.Delete(code)
  450. logger.Info(code, "脚本下架成功")
  451. up = true
  452. err = nil
  453. } else if state == "-1" { //爬虫重采更新线上爬虫
  454. scriptMap := getSpiderScriptDB(code)
  455. logger.Info("更新线上脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  456. if util.Config.Working == 1 { //排队模式
  457. for _, v := range scriptMap {
  458. listsize := 0
  459. listHas := false
  460. count_ok, count_no := 0, 0
  461. LoopListPath.Range(func(key, val interface{}) bool {
  462. listsize++
  463. if tmp, ok := val.(map[string]string); ok {
  464. count_ok++
  465. if tmp["code"] == code && key == code { //队列存在,重载脚本
  466. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  467. listHas = true
  468. LoopListPath.Store(key, v)
  469. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  470. logger.Info("队列模式更新列表页信息状态", code)
  471. }
  472. } else {
  473. count_no++
  474. }
  475. return true
  476. })
  477. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  478. if !listHas { //队列不存在
  479. logger.Info("重采更新爬虫失败:", code)
  480. up = false
  481. err = errors.New("爬虫不在线:" + code)
  482. } else {
  483. up = true
  484. err = nil
  485. logger.Info("重采更新爬虫成功", code)
  486. }
  487. }
  488. } else { //高性能模式
  489. for k, v := range scriptMap {
  490. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  491. sp := spd.(*Spider)
  492. sp.ScriptFile = v["script"]
  493. if v["createuser"] != "" {
  494. sp.UserName = v["createuser"]
  495. }
  496. if v["createuseremail"] != "" {
  497. sp.UserEmail = v["createuseremail"]
  498. }
  499. sp.MUserName = v["modifyuser"]
  500. sp.MUserEmail = v["modifyemail"]
  501. Allspiders.Store(k, sp)
  502. up = true
  503. err = nil
  504. logger.Info("重采更新爬虫成功", sp.Code)
  505. } else { //不存在
  506. up = false
  507. err = errors.New("爬虫不在线:" + code)
  508. logger.Info("重采更新爬虫失败:", code)
  509. }
  510. //Allspiders2
  511. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  512. sp2 := spd2.(*Spider)
  513. sp2.ScriptFile = v["script"]
  514. if v["createuser"] != "" {
  515. sp2.UserName = v["createuser"]
  516. }
  517. if v["createuseremail"] != "" {
  518. sp2.UserEmail = v["createuseremail"]
  519. }
  520. sp2.MUserName = v["modifyuser"]
  521. sp2.MUserEmail = v["modifyemail"]
  522. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  523. Allspiders2.Store(k, sp2)
  524. // up = true
  525. // err = nil
  526. logger.Info("Allspiders2重采更新爬虫成功", sp2.Code)
  527. } else { //不存在
  528. // up = false
  529. // err = errors.New("爬虫不在线:" + code)
  530. logger.Info("Allspiders2重采更新爬虫失败:", code)
  531. }
  532. }
  533. }
  534. } else { //脚本上架
  535. scriptMap := getSpiderScriptDB(code)
  536. logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
  537. if util.Config.Working == 1 { //排队模式
  538. for _, v := range scriptMap {
  539. listsize := 0
  540. listHas := false
  541. count_ok, count_no := 0, 0
  542. LoopListPath.Range(func(key, val interface{}) bool {
  543. listsize++
  544. if tmp, ok := val.(map[string]string); ok { //此处判断仅仅为了得到count_ok的值,可直接判断key==code
  545. count_ok++
  546. if tmp["code"] == code && code == key { //队列存在,重载脚本
  547. logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
  548. listHas = true
  549. LoopListPath.Store(key, v)
  550. UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
  551. logger.Info("队列模式更新列表页信息状态", code)
  552. }
  553. } else {
  554. count_no++
  555. }
  556. return true
  557. })
  558. logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  559. if !listHas { //队列中不存在,新增
  560. logger.Info("上架新增脚本,队列中不存在")
  561. LoopListPath.Store(code, v) //上架
  562. // lock.Lock()
  563. // defer lock.Unlock()
  564. // if len(ChanDels) > 0 {
  565. // for i, _ := range ChanDels {
  566. // logger.Info("上架新增脚本,替补队列中位置", i)
  567. // LoopListPath.Store(i, v)
  568. // delete(ChanDels, i)
  569. // break
  570. // }
  571. // } else {
  572. // logger.Info("上架新增脚本,新增队列中位置", listsize)
  573. // LoopListPath.Store(listsize, v) //上架
  574. // }
  575. //校验是否上架成功
  576. saveList := false //记录是否上架成功
  577. listsize, count_ok, count_no = 0, 0, 0
  578. LoopListPath.Range(func(key, val interface{}) bool {
  579. listsize++
  580. if tmp, ok := val.(map[string]string); ok {
  581. count_ok++
  582. if tmp["code"] == code && key == code { //队列存在
  583. saveList = true
  584. logger.Info("上架脚本成功", code)
  585. }
  586. } else {
  587. count_no++
  588. }
  589. return true
  590. })
  591. logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
  592. if !saveList { //上架失败
  593. logger.Info("上架脚本", code, " 失败")
  594. return false, errors.New("use " + code + " failed")
  595. }
  596. }
  597. logger.Info("上架新增脚本", code)
  598. up = true
  599. }
  600. } else { //高性能模式
  601. for k, v := range scriptMap {
  602. LoopListPath.Store(k, v)
  603. //1、Allspiders对应7000、7100、7400脚本上架下载数据(列表页爬虫集合)
  604. if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  605. sp := spd.(*Spider)
  606. sp.ScriptFile = v["script"]
  607. sp.UserName = v["createuser"]
  608. sp.UserEmail = v["createuseremail"]
  609. sp.MUserName = v["modifyuser"]
  610. sp.MUserEmail = v["modifyemail"]
  611. UpdateSpider(sp, k, v["script"]) //爬虫其他信息更新
  612. //sp.LoadScript(&sp.Name, &sp.Channel, &sp.MUserName, k, sp.ScriptFile, true, false) //更新上架,重载脚本
  613. Allspiders.Store(k, sp)
  614. up = true
  615. err = nil
  616. logger.Info("上架重载脚本", sp.Code)
  617. } else { //新增脚本
  618. sp, errstr := CreateSpider(k, v["script"], true, false)
  619. if errstr == "" && sp != nil && sp.Code != "nil" {
  620. sp.UserName = v["createuser"]
  621. sp.UserEmail = v["createuseremail"]
  622. sp.MUserName = v["modifyuser"]
  623. sp.MUserEmail = v["modifyemail"]
  624. Allspiders.Store(k, sp)
  625. sp.StartJob()
  626. up = true
  627. err = nil
  628. logger.Info("上架新增脚本", sp.Code)
  629. } else {
  630. err = errors.New("新增失败")
  631. nowT := time.Now().Unix()
  632. MgoS.Update("spider_loadfail",
  633. map[string]interface{}{
  634. "code": k,
  635. "modifytime": map[string]interface{}{
  636. "$gte": nowT - 12*3600,
  637. "$lte": nowT + 12*3600,
  638. },
  639. },
  640. map[string]interface{}{
  641. "$set": map[string]interface{}{
  642. "code": k,
  643. "type": "新增初始化脚本",
  644. "script": v["script"],
  645. "updatetime": nowT,
  646. "modifyuser": sp.MUserName,
  647. "event": util.Config.Uploadevent,
  648. "err": errstr,
  649. },
  650. }, true, false)
  651. }
  652. }
  653. //2、Allspiders2对应7100、7110、7400上架采集三级页数据(Allspiders2三级页爬虫集合)
  654. if util.Config.Modal == 1 {
  655. //Allspiders2
  656. if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
  657. sp2 := spd2.(*Spider)
  658. sp2.ScriptFile = v["script"]
  659. sp2.UserName = v["createuser"]
  660. sp2.UserEmail = v["createuseremail"]
  661. sp2.MUserName = v["modifyuser"]
  662. sp2.MUserEmail = v["modifyemail"]
  663. UpdateSpider(sp2, k, v["script"]) //爬虫其他信息更新
  664. sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
  665. Allspiders2.Store(k, sp2) //重载后放入集合
  666. UpdateHighListDataByCode(k) //爬虫更新上架后,重置数据state=0
  667. // up = true
  668. // err = nil
  669. logger.Info("Allspiders2上架重载脚本", sp2.Code)
  670. } else { //新增脚本
  671. sp2, errstr := CreateSpider(k, v["script"], true, false)
  672. if errstr == "" && sp2 != nil && sp2.Code != "nil" {
  673. sp2.UserName = v["createuser"]
  674. sp2.UserEmail = v["createuseremail"]
  675. sp2.MUserName = v["modifyuser"]
  676. sp2.MUserEmail = v["modifyemail"]
  677. sp2.IsMainThread = true //多线程采集时使用
  678. go sp2.DownloadHighDetail(true) //根据列表页数据下载三级页
  679. Allspiders2.Store(k, sp2)
  680. // up = true
  681. // err = nil
  682. logger.Info("Allspiders2上架新增脚本", sp2.Code)
  683. } /*else {
  684. err = errors.New("新增失败")
  685. mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  686. "code": k,
  687. "type": "新增脚本失败",
  688. "script": v["script"],
  689. "intime": time.Now().Format(qu.Date_Full_Layout),
  690. "event": util.Config.Uploadevent,
  691. })
  692. }*/
  693. }
  694. }
  695. }
  696. }
  697. }
  698. logger.Info("上下架:", up, err)
  699. return up, err
  700. }
  701. //定时重载脚本文件
  702. func ReloadSpiderFile() {
  703. scriptMap := getSpiderScriptFile(true)
  704. for k, v := range scriptMap {
  705. for i, as := range []sync.Map{Allspiders, Allspiders2} {
  706. if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用
  707. continue
  708. }
  709. if spd, ok := as.Load(k); ok { //对应脚本已存在,更新
  710. sp := spd.(*Spider)
  711. logger.Info("定时重载脚本", sp.Code)
  712. sp.ScriptFile = v["script"]
  713. if v["createuser"] != "" {
  714. sp.UserName = v["createuser"]
  715. }
  716. if v["createuseremail"] != "" {
  717. sp.UserEmail = v["createuseremail"]
  718. }
  719. sp.MUserName = v["modifyuser"]
  720. sp.MUserEmail = v["modifyemail"]
  721. as.Store(k, sp)
  722. } else { //新增脚本
  723. var sp *Spider
  724. var errstr string
  725. if util.Config.Working == 1 { //排队模式
  726. if i == 0 {
  727. //length := 0
  728. //LoopListPath.Range(func(k, v interface{}) bool {
  729. // length++
  730. // return true
  731. //})
  732. LoopListPath.Store(k, v) //排队模式Allspiders,Allspiders2共用一个LoopListPath,新增一次即可
  733. sp, errstr = CreateSpider(k, v["script"], false, false)
  734. } else {
  735. sp, errstr = CreateSpider(k, v["script"], true, false)
  736. }
  737. } else {
  738. sp, errstr = CreateSpider(k, v["script"], true, false)
  739. }
  740. if errstr == "" && sp != nil && sp.Code != "nil" {
  741. if v["createuser"] != "" {
  742. sp.UserName = v["createuser"]
  743. }
  744. if v["createuseremail"] != "" {
  745. sp.UserEmail = v["createuseremail"]
  746. }
  747. sp.MUserName = v["modifyuser"]
  748. sp.MUserEmail = v["modifyemail"]
  749. as.Store(k, sp)
  750. if util.Config.Working == 1 {
  751. sp.Stop = true
  752. // if i == 0 {
  753. // length := 0
  754. // LoopListPath.Range(func(k, v interface{}) bool {
  755. // length++
  756. // return true
  757. // })
  758. // LoopListPath.Store(length, v)
  759. // }
  760. } else {
  761. sp.Stop = false
  762. if i == 0 { //高性能模式只有Allspiders启动爬虫,Allspiders2只负责下三级页
  763. sp.StartJob()
  764. }
  765. }
  766. logger.Info("定时重载脚本--新增", sp.Code)
  767. } else {
  768. if i == 0 {
  769. nowT := time.Now().Unix()
  770. MgoS.Update("spider_loadfail",
  771. map[string]interface{}{
  772. "code": k,
  773. "modifytime": map[string]interface{}{
  774. "$gte": nowT - 12*3600,
  775. "$lte": nowT + 12*3600,
  776. },
  777. },
  778. map[string]interface{}{
  779. "$set": map[string]interface{}{
  780. "code": k,
  781. "type": "定时重载--新增失败",
  782. "script": v["script"],
  783. "updatetime": nowT,
  784. "modifyuser": sp.MUserName,
  785. "event": util.Config.Uploadevent,
  786. "err": errstr,
  787. },
  788. }, true, false)
  789. }
  790. }
  791. }
  792. }
  793. // if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
  794. // sp := spd.(*Spider)
  795. // logger.Info("定时重载脚本", sp.Code)
  796. // sp.ScriptFile = v["script"]
  797. // if v["createuser"] != "" {
  798. // sp.UserName = v["createuser"]
  799. // }
  800. // if v["createuseremail"] != "" {
  801. // sp.UserEmail = v["createuseremail"]
  802. // }
  803. // sp.MUserName = v["modifyuser"]
  804. // sp.MUserEmail = v["modifyemail"]
  805. // Allspiders.Store(k, sp)
  806. // } else { //新增脚本
  807. // var sp *Spider
  808. // if util.Config.Working == 1 { //排队模式
  809. // length := 0
  810. // LoopListPath.Range(func(k, v interface{}) bool {
  811. // length++
  812. // return true
  813. // })
  814. // LoopListPath.Store(length, v)
  815. // sp = CreateSpider(k, v["script"], false,false)
  816. // } else {
  817. // sp = NewSpider(k, v["script"])
  818. // }
  819. // if sp != nil && sp.Code != "nil" {
  820. // if v["createuser"] != "" {
  821. // sp.UserName = v["createuser"]
  822. // }
  823. // if v["createuseremail"] != "" {
  824. // sp.UserEmail = v["createuseremail"]
  825. // }
  826. // sp.MUserName = v["modifyuser"]
  827. // sp.MUserEmail = v["modifyemail"]
  828. // Allspiders.Store(k, sp)
  829. // if util.Config.Working == 1 {
  830. // sp.Stop = true
  831. // length := 0
  832. // LoopListPath.Range(func(k, v interface{}) bool {
  833. // length++
  834. // return true
  835. // })
  836. // LoopListPath.Store(length, v)
  837. // } else {
  838. // sp.Stop = false
  839. // sp.StartJob()
  840. // }
  841. // logger.Info("定时重载脚本--新增", sp.Code)
  842. // } else {
  843. // mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
  844. // "code": k,
  845. // "type": "定时重载--新增失败",
  846. // "script": v["script"],
  847. // "intime": time.Now().Format(qu.Date_Full_Layout),
  848. // "event": util.Config.Uploadevent,
  849. // })
  850. // }
  851. // }
  852. }
  853. util.TimeAfterFunc(time.Duration(15)*time.Minute, ReloadSpiderFile, TimeChan)
  854. }
  855. //生成爬虫
  856. func CreateSpider(code, luafile string, newstate, thread bool) (*Spider, string) {
  857. defer qu.Catch()
  858. spider := &Spider{}
  859. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, thread)
  860. if err != "" {
  861. return nil, err
  862. }
  863. spider.Code = spider.GetVar("spiderCode")
  864. spider.SCode = spider.Code
  865. spider.Name = spider.GetVar("spiderName")
  866. spider.Channel = spider.GetVar("spiderChannel")
  867. //spider.LastExecTime = GetLastExectime(spider.Code)
  868. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  869. spider.Collection = spider.GetVar("spider2Collection")
  870. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  871. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  872. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  873. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  874. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  875. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  876. if spiderSleepBase == -1 {
  877. spider.SleepBase = 1000
  878. } else {
  879. spider.SleepBase = spiderSleepBase
  880. }
  881. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  882. if spiderSleepRand == -1 {
  883. spider.SleepRand = 1000
  884. } else {
  885. spider.SleepRand = spiderSleepRand
  886. }
  887. spiderTimeout := spider.GetIntVar("spiderTimeout")
  888. if spiderTimeout == -1 {
  889. spider.Timeout = 60
  890. } else {
  891. spider.Timeout = int64(spiderTimeout)
  892. }
  893. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  894. spider.UserName = spider.GetVar("spiderUserName")
  895. spider.UserEmail = spider.GetVar("spiderUserEmail")
  896. spider.UploadTime = spider.GetVar("spiderUploadTime")
  897. //新增历史补漏
  898. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  899. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  900. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  901. //新老爬虫
  902. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  903. //爬虫类型
  904. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  905. return spider, ""
  906. }
  907. //更新爬虫
  908. func UpdateSpider(spider *Spider, code, script string) {
  909. ts := &Spider{}
  910. ts.Script.L = lua.NewState(lua.Options{
  911. RegistrySize: 256 * 20,
  912. CallStackSize: 256,
  913. IncludeGoStackTrace: false,
  914. })
  915. defer ts.L.Close()
  916. ts.Script.L.PreloadModule("http", gluahttp.NewHttpModule(&http.Client{}).Loader)
  917. ts.Script.L.PreloadModule("json", lujson.Loader)
  918. if err := ts.Script.L.DoString(script); err != nil {
  919. logger.Debug(code + ",加载lua脚本错误:" + err.Error())
  920. return
  921. }
  922. spider.Channel = ts.GetVar("spiderChannel") //栏目名称
  923. spider.DownDetail = ts.GetBoolVar("spiderDownDetailPage") //是否下三级页
  924. spider.Collection = ts.GetVar("spider2Collection") //存储表
  925. spider.SpiderRunRate = int64(ts.GetIntVar("spiderRunRate")) //间隔时间
  926. spider.StoreToMsgEvent = ts.GetIntVar("spiderStoreToMsgEvent") //4002
  927. spider.StoreMode = ts.GetIntVar("spiderStoreMode") //2
  928. spider.CoverAttr = ts.GetVar("spiderCoverAttr") //title
  929. //下载三级页(DownloadDetailPage)随机延迟
  930. spiderSleepBase := ts.GetIntVar("spiderSleepBase")
  931. if spiderSleepBase == -1 {
  932. spider.SleepBase = 1000
  933. } else {
  934. spider.SleepBase = spiderSleepBase
  935. }
  936. spiderSleepRand := ts.GetIntVar("spiderSleepRand")
  937. if spiderSleepRand == -1 {
  938. spider.SleepRand = 1000
  939. } else {
  940. spider.SleepRand = spiderSleepRand
  941. }
  942. spiderTimeout := ts.GetIntVar("spiderTimeout")
  943. if spiderTimeout == -1 {
  944. spider.Timeout = 60
  945. } else {
  946. spider.Timeout = int64(spiderTimeout)
  947. }
  948. spider.TargetChannelUrl = ts.GetVar("spiderTargetChannelUrl") //栏目地址
  949. //新增历史补漏
  950. spider.IsHistoricalMend = ts.GetBoolVar("spiderIsHistoricalMend")
  951. spider.IsMustDownload = ts.GetBoolVar("spiderIsMustDownload")
  952. //新老爬虫
  953. spider.IsCompete = ts.GetBoolVar("spiderIsCompete")
  954. //爬虫类型
  955. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  956. }
  957. //排队模式生成爬虫
  958. func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
  959. defer qu.Catch()
  960. spider := &Spider{}
  961. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, false)
  962. if err != "" {
  963. return nil, err
  964. }
  965. spider.Code = spider.GetVar("spiderCode")
  966. spider.Script.SCode = spider.Code
  967. spider.Name = spider.GetVar("spiderName")
  968. spider.Channel = spider.GetVar("spiderChannel")
  969. //spider.LastExecTime = GetLastExectime(spider.Code)
  970. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  971. spider.Collection = spider.GetVar("spider2Collection")
  972. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  973. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  974. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  975. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  976. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  977. if spiderSleepBase == -1 {
  978. spider.SleepBase = 1000
  979. } else {
  980. spider.SleepBase = spiderSleepBase
  981. }
  982. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  983. if spiderSleepRand == -1 {
  984. spider.SleepRand = 1000
  985. } else {
  986. spider.SleepRand = spiderSleepRand
  987. }
  988. spiderTimeout := spider.GetIntVar("spiderTimeout")
  989. if spiderTimeout == -1 {
  990. spider.Timeout = 60
  991. } else {
  992. spider.Timeout = int64(spiderTimeout)
  993. }
  994. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  995. if v, ok := Allspiders.Load(spider.Code); ok {
  996. sp := v.(*Spider)
  997. spider.TodayDowncount = sp.TodayDowncount
  998. spider.ToDayRequestNum = sp.ToDayRequestNum
  999. spider.YesterdayDowncount = sp.YesterdayDowncount
  1000. spider.YestoDayRequestNum = sp.YestoDayRequestNum
  1001. spider.TotalDowncount = sp.TotalDowncount
  1002. spider.TotalRequestNum = sp.TotalRequestNum
  1003. spider.ErrorNum = sp.ErrorNum
  1004. spider.RoundCount = sp.RoundCount
  1005. }
  1006. spider.UserName = spider.GetVar("spiderUserName")
  1007. spider.UserEmail = spider.GetVar("spiderUserEmail")
  1008. spider.UploadTime = spider.GetVar("spiderUploadTime")
  1009. //新增历史补漏
  1010. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  1011. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  1012. //新老爬虫
  1013. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  1014. //爬虫类型
  1015. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  1016. return spider, ""
  1017. }
  1018. //高性能模式生成爬虫
  1019. func NewSpider(code, luafile string) (*Spider, string) {
  1020. defer qu.Catch()
  1021. spider := &Spider{}
  1022. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, false)
  1023. if err != "" {
  1024. return nil, err
  1025. }
  1026. spider.Code = spider.GetVar("spiderCode")
  1027. spider.SCode = spider.Code
  1028. spider.Name = spider.GetVar("spiderName")
  1029. spider.Channel = spider.GetVar("spiderChannel")
  1030. //spider.LastExecTime = GetLastExectime(spider.Code)
  1031. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  1032. spider.Collection = spider.GetVar("spider2Collection")
  1033. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  1034. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  1035. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  1036. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  1037. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  1038. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  1039. if spiderSleepBase == -1 {
  1040. spider.SleepBase = 1000
  1041. } else {
  1042. spider.SleepBase = spiderSleepBase
  1043. }
  1044. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  1045. if spiderSleepRand == -1 {
  1046. spider.SleepRand = 1000
  1047. } else {
  1048. spider.SleepRand = spiderSleepRand
  1049. }
  1050. spiderTimeout := spider.GetIntVar("spiderTimeout")
  1051. if spiderTimeout == -1 {
  1052. spider.Timeout = 60
  1053. } else {
  1054. spider.Timeout = int64(spiderTimeout)
  1055. }
  1056. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  1057. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  1058. tmp := GetDownloadLast(spider.Code, date) //
  1059. if len(tmp) > 0 {
  1060. spider.TodayDowncount = int32(qu.IntAll(tmp["todaydowncount"]))
  1061. spider.ToDayRequestNum = int32(qu.IntAll(tmp["todaydownreq"]))
  1062. spider.YesterdayDowncount = int32(qu.IntAll(tmp["yesdowncount"]))
  1063. spider.YestoDayRequestNum = int32(qu.IntAll(tmp["yesdownreq"]))
  1064. spider.TotalDowncount = spider.TodayDowncount + int32(qu.IntAll(tmp["totaldown"]))
  1065. spider.TotalRequestNum = spider.ToDayRequestNum + int32(qu.IntAll(tmp["totalreq"]))
  1066. }
  1067. spider.UserName = spider.GetVar("spiderUserName")
  1068. spider.UserEmail = spider.GetVar("spiderUserEmail")
  1069. spider.UploadTime = spider.GetVar("spiderUploadTime")
  1070. //新增历史补漏
  1071. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  1072. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  1073. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  1074. //新老爬虫
  1075. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  1076. //爬虫类型
  1077. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  1078. return spider, ""
  1079. }
  1080. //多线程生成爬虫
  1081. func NewSpiderForThread(code, luafile string) (*Spider, string) {
  1082. defer qu.Catch()
  1083. spider := &Spider{}
  1084. err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, true)
  1085. if err != "" {
  1086. return nil, err
  1087. }
  1088. spider.Code = spider.GetVar("spiderCode")
  1089. spider.SCode = spider.Code
  1090. spider.Script.SCode = spider.Code
  1091. spider.Name = spider.GetVar("spiderName")
  1092. spider.Channel = spider.GetVar("spiderChannel")
  1093. //spider.LastExecTime = GetLastExectime(spider.Code)
  1094. spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
  1095. spider.Collection = spider.GetVar("spider2Collection")
  1096. spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
  1097. //spider.Thread = int64(spider.GetIntVar("spiderThread"))
  1098. spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
  1099. spider.StoreMode = spider.GetIntVar("spiderStoreMode")
  1100. spider.CoverAttr = spider.GetVar("spiderCoverAttr")
  1101. spiderSleepBase := spider.GetIntVar("spiderSleepBase")
  1102. if spiderSleepBase == -1 {
  1103. spider.SleepBase = 1000
  1104. } else {
  1105. spider.SleepBase = spiderSleepBase
  1106. }
  1107. spiderSleepRand := spider.GetIntVar("spiderSleepRand")
  1108. if spiderSleepRand == -1 {
  1109. spider.SleepRand = 1000
  1110. } else {
  1111. spider.SleepRand = spiderSleepRand
  1112. }
  1113. spiderTimeout := spider.GetIntVar("spiderTimeout")
  1114. if spiderTimeout == -1 {
  1115. spider.Timeout = 60
  1116. } else {
  1117. spider.Timeout = int64(spiderTimeout)
  1118. }
  1119. spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
  1120. spider.UserName = spider.GetVar("spiderUserName")
  1121. spider.UserEmail = spider.GetVar("spiderUserEmail")
  1122. spider.UploadTime = spider.GetVar("spiderUploadTime")
  1123. //新增历史补漏
  1124. //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
  1125. spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
  1126. spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
  1127. //新老爬虫
  1128. spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
  1129. //爬虫类型
  1130. spider.Infoformat = spider.GetIntVar("spiderInfoformat")
  1131. return spider, ""
  1132. }
  1133. //下载量入库
  1134. func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum, yesterdayDowncount, yestoDayRequestNum int32) {
  1135. date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
  1136. updata := map[string]interface{}{}
  1137. if addtotal {
  1138. updata = map[string]interface{}{
  1139. "$inc": map[string]interface{}{"totaldown": todayDowncount, "totalreq": todayRequestNum},
  1140. "$set": map[string]interface{}{
  1141. "yesdowncount": yesterdayDowncount,
  1142. "yesdownreq": yestoDayRequestNum,
  1143. "todaydowncount": todayDowncount,
  1144. "todaydownreq": todayRequestNum,
  1145. "date": date,
  1146. "year": time.Now().Year(),
  1147. "month": time.Now().Month(),
  1148. "day": time.Now().Day(),
  1149. },
  1150. }
  1151. } else {
  1152. updata = map[string]interface{}{
  1153. "$set": map[string]interface{}{
  1154. "yesdowncount": yesterdayDowncount,
  1155. "yesdownreq": yestoDayRequestNum,
  1156. "todaydowncount": todayDowncount,
  1157. "todaydownreq": todayRequestNum,
  1158. "date": date,
  1159. "year": time.Now().Year(),
  1160. "month": time.Now().Month(),
  1161. "day": time.Now().Day(),
  1162. },
  1163. }
  1164. }
  1165. MgoS.Update("spider_downlog", map[string]interface{}{"code": code, "date": date}, updata, true, false)
  1166. }
  1167. //获取下载的上下限(没用)
  1168. func GetLimitDownload(code string) (uplimit, lowlimit int) {
  1169. defer qu.Catch()
  1170. ret, _ := MgoS.FindOne("spider_ldtime", map[string]interface{}{"code": code})
  1171. if ret != nil && len(*ret) > 0 {
  1172. uplimit = qu.IntAll((*ret)["uplimit"])
  1173. lowlimit = qu.IntAll((*ret)["lowlimit"])
  1174. return uplimit, lowlimit
  1175. } else {
  1176. return 100, 0
  1177. }
  1178. }
  1179. //拼装脚本
  1180. func GetScriptByTmp(luaconfig map[string]interface{}) string {
  1181. defer qu.Catch()
  1182. script := ""
  1183. if luaconfig["listcheck"] == nil {
  1184. luaconfig["listcheck"] = ""
  1185. }
  1186. if luaconfig["contentcheck"] == nil {
  1187. luaconfig["contentcheck"] = ""
  1188. }
  1189. if luaconfig != nil && len(luaconfig) > 0 {
  1190. common := luaconfig["param_common"].([]interface{})
  1191. //新增spiderIsHistoricalMend spiderIsMustDownload
  1192. if len(common) == 15 {
  1193. common = append(common, "", "", "")
  1194. } else {
  1195. common = append(common, false, false, "", "", "")
  1196. }
  1197. for k, v := range common {
  1198. if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
  1199. common[k] = qu.IntAll(v)
  1200. }
  1201. }
  1202. script, _ = GetTmpModel(map[string][]interface{}{"common": common})
  1203. //发布时间
  1204. script_time := ""
  1205. if qu.IntAll(luaconfig["type_time"]) == 0 { //向导模式
  1206. time := luaconfig["param_time"].([]interface{})
  1207. script_time, _ = GetTmpModel(map[string][]interface{}{
  1208. "time": time,
  1209. })
  1210. } else { //专家模式
  1211. script_time = luaconfig["str_time"].(string)
  1212. }
  1213. //列表页
  1214. script_list := ""
  1215. if qu.IntAll(luaconfig["type_list"]) == 0 { //向导模式
  1216. list := luaconfig["param_list"].([]interface{})
  1217. addrs := strings.Split(list[1].(string), "\n")
  1218. if len(addrs) > 0 {
  1219. for k, v := range addrs {
  1220. addrs[k] = "'" + v + "'"
  1221. }
  1222. list[1] = strings.Join(addrs, ",")
  1223. } else {
  1224. list[1] = ""
  1225. }
  1226. script_list, _ = GetTmpModel(map[string][]interface{}{
  1227. "list": list,
  1228. "listcheck": []interface{}{luaconfig["listcheck"]},
  1229. })
  1230. } else { //专家模式
  1231. script_list = luaconfig["str_list"].(string)
  1232. }
  1233. //三级页
  1234. script_content := ""
  1235. if qu.IntAll(luaconfig["type_content"]) == 0 { //向导模式
  1236. content := luaconfig["param_content"].([]interface{})
  1237. script_content, _ = GetTmpModel(map[string][]interface{}{
  1238. "content": content,
  1239. "contentcheck": []interface{}{luaconfig["contentcheck"]},
  1240. })
  1241. } else { //专家模式
  1242. script_content = luaconfig["str_content"].(string)
  1243. }
  1244. script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"], luaconfig["infoformat"])
  1245. script += `
  1246. ` + script_time + `
  1247. ` + script_list + `
  1248. ` + script_content
  1249. script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
  1250. }
  1251. return script
  1252. }
  1253. //生成爬虫脚本
  1254. func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
  1255. qu.Try(func() {
  1256. //param_common拼接
  1257. if param != nil && param["common"] != nil {
  1258. if len(param["common"]) < 12 {
  1259. err = "公共参数配置不全"
  1260. } else {
  1261. script = fmt.Sprintf(util.Tmp_common, param["common"]...)
  1262. }
  1263. }
  1264. //发布时间拼接
  1265. if param != nil && param["time"] != nil {
  1266. if len(param["time"]) < 3 {
  1267. err = "方法:time-参数配置不全"
  1268. } else {
  1269. script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
  1270. }
  1271. }
  1272. //列表页拼接
  1273. if param != nil && param["list"] != nil {
  1274. if len(param["list"]) < 7 {
  1275. err = "方法:list-参数配置不全"
  1276. } else {
  1277. list := []interface{}{param["listcheck"][0]}
  1278. list = append(list, param["list"]...)
  1279. script += fmt.Sprintf(util.Tmp_pagelist, list...)
  1280. script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
  1281. }
  1282. }
  1283. //详情页拼接
  1284. if param != nil && param["content"] != nil {
  1285. if len(param["content"]) < 2 {
  1286. err = "方法:content-参数配置不全"
  1287. } else {
  1288. content := []interface{}{param["contentcheck"][0]}
  1289. content = append(content, param["content"]...)
  1290. script += fmt.Sprintf(util.Tmp_content, content...)
  1291. }
  1292. }
  1293. }, func(e interface{}) {
  1294. err = e
  1295. })
  1296. return script, err
  1297. }
  1298. //补充模型
  1299. func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
  1300. defer qu.Catch()
  1301. //补充通用信息
  1302. commstr := `item["spidercode"]="` + comm[0].(string) + `";`
  1303. commstr += `item["site"]="` + comm[1].(string) + `";`
  1304. commstr += `item["channel"]="` + comm[2].(string) + `";`
  1305. script = strings.Replace(script, "--Common--", commstr, -1)
  1306. //补充模型信息
  1307. modelstr := ""
  1308. for k, v := range model {
  1309. modelstr += `item["` + k + `"]="` + v.(string) + `";`
  1310. }
  1311. script = strings.Replace(script, "--Model--", modelstr, -1)
  1312. return script
  1313. }
  1314. //爬虫信息提交编辑器(心跳)
  1315. func SpiderInfoSend() {
  1316. time.Sleep(15 * time.Second)
  1317. list := []interface{}{}
  1318. Allspiders.Range(func(key, value interface{}) bool {
  1319. v := value.(*Spider)
  1320. info := map[string]interface{}{}
  1321. info["code"] = v.Code
  1322. info["todayDowncount"] = v.TodayDowncount
  1323. info["toDayRequestNum"] = v.ToDayRequestNum
  1324. info["yesterdayDowncount"] = v.YesterdayDowncount
  1325. info["yestoDayRequestNum"] = v.YestoDayRequestNum
  1326. info["totalDowncount"] = v.TotalDowncount
  1327. info["totalRequestNum"] = v.TotalRequestNum
  1328. info["errorNum"] = v.ErrorNum
  1329. info["roundCount"] = v.RoundCount
  1330. info["runRate"] = v.SpiderRunRate
  1331. info["lastHeartbeat"] = v.LastHeartbeat
  1332. info["lastDowncount"] = v.LastDowncount
  1333. info["lstate"] = v.L.Status(v.L)
  1334. list = append(list, info)
  1335. return true
  1336. })
  1337. bs, _ := json.Marshal(list)
  1338. value := url.Values{
  1339. "data": []string{util.Se.EncodeString(string(bs))},
  1340. "type": []string{"info"},
  1341. }
  1342. _, err := http.PostForm(util.Config.Editoraddr, value)
  1343. if err != nil {
  1344. logger.Error("send to editor: ", err.Error())
  1345. }
  1346. util.TimeAfterFunc(5*time.Minute, SpiderInfoSend, TimeChan)
  1347. }
  1348. //保存心跳信息
  1349. func SaveHeartInfo() {
  1350. time.Sleep(20 * time.Minute)
  1351. num := 0
  1352. SpiderHeart.Range(func(key, value interface{}) bool {
  1353. code := key.(string)
  1354. sp, spiderOk := LoopListPath.Load(code)
  1355. if spiderOk && sp != nil {
  1356. heart, heartOk := value.(*Heart)
  1357. if heartOk {
  1358. num++
  1359. update := []map[string]interface{}{}
  1360. update = append(update, map[string]interface{}{"code": code})
  1361. update = append(update, map[string]interface{}{"$set": map[string]interface{}{
  1362. "site": heart.Site,
  1363. "channel": heart.Channel,
  1364. "list": heart.ListHeart,
  1365. "findlist": heart.FindListHeart,
  1366. "detail": heart.DetailHeart,
  1367. "detailexecute": heart.DetailExecuteHeart,
  1368. "modifyuser": heart.ModifyUser,
  1369. "event": util.Config.Uploadevent,
  1370. "updatetime": time.Now().Unix(),
  1371. "del": false,
  1372. }})
  1373. UpdataHeartCache <- update
  1374. }
  1375. } else {
  1376. SpiderHeart.Delete(key)
  1377. }
  1378. return true
  1379. })
  1380. logger.Info("更新心跳个数:", num)
  1381. time.AfterFunc(20*time.Minute, SaveHeartInfo)
  1382. }
  1383. //保存7000节点爬虫转增量节点日志
  1384. func SpiderCodeSendToEditor(code string) {
  1385. defer qu.Catch()
  1386. MgoEB.Save("luamovelog", map[string]interface{}{
  1387. "code": code,
  1388. "comeintime": time.Now().Unix(),
  1389. "ok": false,
  1390. })
  1391. //ok := false
  1392. //for i := 1; i <= 3; i++ {
  1393. // logger.Info("Code:", code, " times:", i, " Send Move Event")
  1394. // list := []interface{}{}
  1395. // list = append(list, code)
  1396. // bs, _ := json.Marshal(list)
  1397. // value := url.Values{
  1398. // "data": []string{util.Se.EncodeString(string(bs))},
  1399. // "type": []string{"code"},
  1400. // }
  1401. // res, err := http.PostForm(util.Config.Editoraddr, value)
  1402. // if err != nil {
  1403. // logger.Error("Send To Editor For Move Event Failed,Code:", code)
  1404. // } else {
  1405. // if res != nil {
  1406. // res.Body.Close()
  1407. // }
  1408. // ok = true
  1409. // break
  1410. // }
  1411. //}
  1412. //logger.Info("Code:", code, " Send Move Event:", ok)
  1413. //MgoEB.Save("luamovelog", map[string]interface{}{
  1414. // "code": code,
  1415. // "comeintime": time.Now().Unix(),
  1416. // "type": "sendfail",
  1417. // "ok": ok,
  1418. //})
  1419. }