12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457 |
- package spider
- import (
- "bufio"
- "encoding/json"
- "errors"
- "fmt"
- "github.com/cjoudrey/gluahttp"
- lujson "github.com/yuin/gopher-json"
- "net/http"
- "net/url"
- "os"
- "path/filepath"
- qu "qfw/util"
- "regexp"
- util "spiderutil"
- "strings"
- "sync"
- "time"
- "github.com/donnie4w/go-logger/logger"
- "github.com/yuin/gopher-lua"
- )
- var SpiderHeart sync.Map = sync.Map{} //爬虫心跳
- var Allspiders sync.Map = sync.Map{}
- var Allspiders2 sync.Map = sync.Map{}
- var LoopListPath sync.Map = sync.Map{}
- //var ChanDels = map[int]string{}
- //var lock sync.Mutex
- var CC chan *lua.LState
- var CC2 chan *lua.LState
- var Chansize int
- var regcode, _ = regexp.Compile(`="(.*)"`)
- var InitCount int
- var InitAllLuaOver = make(chan bool, 1) //所有脚本是否加载完毕
- func InitSpider() {
- scriptMap := getSpiderScriptDB("all") //加载爬虫,初始化模板
- scriptMapFile := getSpiderScriptFile(false)
- for code, v := range scriptMap {
- LoopListPath.Store(code, v)
- InitCount++
- }
- for code, v := range scriptMapFile {
- LoopListPath.Store(code, v)
- InitCount++
- }
- if util.Config.Working == 0 {
- NoQueueScript() //高性能模式
- } else {
- if util.Config.Modal == 0 { //原始模式
- QueueUpScriptList()
- } else { //列表页和三级页分开采集
- go QueueUpScriptList() //节能模式列表页
- go QueueUpScriptDetail() //节能模式三级页
- }
- }
- }
- //高性能模式
- func NoQueueScript() {
- list, _ := MgoS.Find("spider_ldtime", nil, nil, map[string]interface{}{"code": 1, "uplimit": 1, "lowlimit": 1}, false, -1, -1)
- LoopListPath.Range(func(key, temp interface{}) bool {
- if info, ok := temp.(map[string]string); ok {
- code := info["code"]
- script := info["script"]
- sp, errstr := CreateSpider(code, script, true, false)
- if errstr == "" && sp != nil && sp.Code != "nil" { //脚本加载成功
- //sp.Index = qu.IntAll(key)
- //sp2.Index = qu.IntAll(key)
- if info["createuser"] != "" {
- sp.UserName = info["createuser"]
- }
- if info["createuseremail"] != "" {
- sp.UserEmail = info["createuseremail"]
- }
- sp.MUserName = info["modifyuser"]
- sp.MUserEmail = info["modifyemail"]
- Allspiders.Store(sp.Code, sp)
- for _, tmp := range *list {
- if qu.ObjToString(tmp["code"]) == sp.Code {
- sp.UpperLimit = qu.IntAll(tmp["uplimit"])
- //sp2.UpperLimit = qu.IntAll(tmp["uplimit"])
- sp.LowerLimit = qu.IntAll(tmp["lowlimit"])
- //sp2.LowerLimit = qu.IntAll(tmp["lowlimit"])
- break
- }
- }
- if util.Config.Modal == 1 { //列表页、三级页分开采集模式
- sp2, _ := CreateSpider(code, script, true, false)
- sp2.UserName = sp.UserName
- sp2.UserEmail = sp.UserEmail
- sp2.MUserName = sp.MUserName
- sp2.MUserEmail = sp.MUserEmail
- sp2.IsMainThread = true //多线程采集时使用
- Allspiders2.Store(sp.Code, sp2)
- }
- sp.StartJob()
- //util.TimeSleepFunc(10*time.Millisecond, TimeSleepChan)
- } else {
- logger.Info(code, "脚本加载失败,请检查!")
- nowT := time.Now().Unix()
- username := "异常"
- if sp != nil {
- username = sp.MUserName
- }
- MgoS.Update("spider_loadfail",
- map[string]interface{}{
- "code": code,
- "modifytime": map[string]interface{}{
- "$gte": nowT - 12*3600,
- "$lte": nowT + 12*3600,
- },
- },
- map[string]interface{}{
- "$set": map[string]interface{}{
- "code": code,
- "type": "初始化",
- "script": script,
- "updatetime": nowT,
- "modifyuser": username,
- "event": util.Config.Uploadevent,
- "err": errstr,
- },
- }, true, false)
- }
- time.Sleep(100 * time.Millisecond)
- }
- return true
- })
- InitAllLuaOver <- true //爬虫初始化完毕
- logger.Info("高性能模式:LUA加载完成")
- numSpider := 0
- Allspiders.Range(func(key, value interface{}) bool {
- numSpider++
- return true
- })
- logger.Info("总共加载脚本数:", numSpider)
- }
- //排队模式下载列表页数据
- func QueueUpScriptList() {
- logger.Info("节能模式列表页")
- CC = make(chan *lua.LState, util.Config.Chansize)
- for i := 0; i < util.Config.Chansize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
- CC <- lua.NewState(lua.Options{
- RegistrySize: 256 * 20,
- CallStackSize: 256,
- IncludeGoStackTrace: false,
- })
- }
- for {
- listLen, listNoLen, DelLen := 0, 0, 0
- logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环", "初始化脚本数量:", InitCount)
- LoopListPath.Range(func(key, temp interface{}) bool {
- if info, ok := temp.(map[string]string); ok {
- code := info["code"]
- old_is_running := false
- tmp, b := Allspiders.Load(code)
- if b {
- if sp_old, ok := tmp.(*Spider); ok {
- if !sp_old.Stop {
- old_is_running = true
- }
- }
- }
- logger.Info("Code:", code, "Is Downloading List:", old_is_running)
- if !old_is_running { //判断当前爬虫是否正在执行
- script := info["script"]
- sp, errstr := CreateSpider(code, script, false, false)
- //logger.Info("初始化脚本是否成功:", sp != nil, e.Value)
- if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
- //sp.Index = qu.IntAll(key)
- sp.UserName = info["createuser"]
- sp.UserEmail = info["createuseremail"]
- sp.MUserName = info["modifyuser"]
- sp.MUserEmail = info["modifyemail"]
- Allspiders.Store(code, sp)
- sp.StartJob()
- } else {
- nowT := time.Now().Unix()
- username := "异常"
- if sp != nil {
- username = sp.MUserName
- }
- MgoS.Update("spider_loadfail",
- map[string]interface{}{
- "code": code,
- "modifytime": map[string]interface{}{
- "$gte": nowT - 12*3600,
- "$lte": nowT + 12*3600,
- },
- },
- map[string]interface{}{
- "$set": map[string]interface{}{
- "code": code,
- "type": "初始化",
- "script": script,
- "updatetime": nowT,
- "modifyuser": username,
- "event": util.Config.Uploadevent,
- "err": errstr,
- },
- }, true, false)
- }
- if sp != nil && sp.IsHistoricalMend { //下载历史的爬虫执行一次后删除
- DelLen++
- LoopListPath.Delete(key)
- b = MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": 6}}, false, false)
- logger.Debug("Delete History Code:", code, b)
- }
- }
- listLen++
- } else {
- logger.Info("Code:", key, "Is Not Download List")
- listNoLen++
- }
- time.Sleep(100 * time.Millisecond)
- return true
- })
- time.Sleep(1 * time.Second)
- count_ok, count_no := 0, 0
- LoopListPath.Range(func(k, v interface{}) bool {
- if v != nil {
- count_ok++
- } else {
- count_no++
- }
- return true
- })
- InitCount = count_ok
- logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载列表页执行死循环,列表长度,", listLen, listNoLen, "删除数量", DelLen, "执行完毕后数量统计:", count_ok, count_no)
- }
- }
- //排队模式下载三级页数据
- func QueueUpScriptDetail() {
- logger.Info("节能模式三级页")
- chanSize := util.Config.DetailChansize
- CC2 = make(chan *lua.LState, chanSize)
- for i := 0; i < chanSize; i++ { //目前初始化Allspiders,Allspiders2两个爬虫池,线程乘2
- CC2 <- lua.NewState(lua.Options{
- RegistrySize: 256 * 20,
- CallStackSize: 256,
- IncludeGoStackTrace: false,
- })
- }
- for {
- count_ok, count_no := 0, 0
- logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环", "初始化脚本数量:", InitCount)
- LoopListPath.Range(func(key, temp interface{}) bool {
- if info, ok := temp.(map[string]string); ok {
- count_ok++
- code := info["code"]
- old_is_running := false
- tmp, b := Allspiders2.Load(code)
- if b {
- if sp_old, ok := tmp.(*Spider); ok {
- if !sp_old.Stop {
- old_is_running = true
- }
- }
- }
- logger.Info("Code:", code, "Is Downloading Detail:", old_is_running)
- if !old_is_running { //判断当前爬虫是否正在执行
- script := info["script"]
- sp, errstr := CreateSpider(code, script, true, false)
- if errstr == "" && sp != nil && sp.Code != "nil" { //初始化脚本成功
- //sp.Index = qu.IntAll(key)
- sp.UserName = info["createuser"]
- sp.UserEmail = info["createuseremail"]
- sp.MUserName = info["modifyuser"]
- sp.MUserEmail = info["modifyemail"]
- sp.IsMainThread = true
- Allspiders2.Store(code, sp)
- go sp.DownloadListDetail(false) //下载三级页信息
- }
- }
- } else {
- logger.Info("Code:", key, "Is Not Download Detail")
- count_no++
- }
- time.Sleep(100 * time.Millisecond)
- return true
- })
- InitCount = count_ok
- time.Sleep(1 * time.Second)
- logger.Warn(time.Now().Format(qu.Date_Full_Layout), ":下载三级页执行死循环完毕,数量统计:", count_ok, count_no)
- }
- }
- //获取所有爬虫脚本--数据库
- func getSpiderScriptDB(code string) map[string]map[string]string {
- scriptSpider := map[string]map[string]string{}
- query := map[string]interface{}{}
- if code == "all" { //初始化所有脚本
- query = map[string]interface{}{"state": 5, "event": util.Config.Uploadevent}
- } else { //消息在线上传
- query = map[string]interface{}{"code": code, "event": util.Config.Uploadevent}
- //query = `{"$or":[{"iupload":1},{"iupload":3}],"event":` + fmt.Sprint(util.Config.Uploadevent) + `,"modifytime":{"$gt":1502937042}}`
- }
- listdb, _ := MgoEB.Find("luaconfig", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
- //临时历史附件
- //listdb, _ := MgoEB.Find("luaconfig_test", query, map[string]interface{}{"_id": -1}, nil, false, -1, -1)
- for _, v := range *listdb {
- old := qu.IntAll(v["old_lua"])
- script := ""
- if old == 1 {
- script = fmt.Sprint(v["luacontent"])
- } else {
- if v["oldlua"] != nil {
- if v["luacontent"] != nil {
- script = v["luacontent"].(string)
- }
- } else {
- script = GetScriptByTmp(v)
- }
- }
- scriptSpider[fmt.Sprint(v["code"])] = map[string]string{
- "code": fmt.Sprint(v["code"]),
- "type": fmt.Sprint(v["state"]),
- "script": script,
- "createuser": fmt.Sprint(v["createuser"]),
- "createuseremail": fmt.Sprint(v["createuseremail"]),
- "modifyuser": fmt.Sprint(v["modifyuser"]),
- "modifyemail": fmt.Sprint(v["next"]),
- }
- }
- return scriptSpider
- }
- //获取所有爬虫脚本--文件
- func getSpiderScriptFile(newscript bool) map[string]map[string]string {
- scriptSpider := map[string]map[string]string{}
- filespider := 0
- filepath.Walk("res", func(path string, info os.FileInfo, err error) error {
- if info.IsDir() {
- return nil
- } else if strings.HasPrefix(info.Name(), "spider_") &&
- strings.HasSuffix(info.Name(), ".lua") {
- //过滤test目录
- if strings.Contains(path, "\\test\\") {
- return nil
- }
- loadfile := true
- if newscript {
- if time.Now().Unix() < info.ModTime().Add(time.Duration(15)*time.Minute).Unix() {
- loadfile = true
- } else {
- loadfile = false
- }
- }
- if loadfile {
- f, err := os.Open(path)
- defer f.Close()
- if err != nil {
- logger.Error(err.Error())
- }
- buf := bufio.NewReader(f)
- script := ""
- code := ""
- for {
- line, err := buf.ReadString('\n')
- if code == "" && strings.Contains(line, "spiderCode=") {
- res := regcode.FindAllStringSubmatch(line, -1)
- if len(res) > 0 {
- code = res[0][1]
- //logger.Info("code", code)
- } else {
- break
- }
- }
- if scriptSpider[code] == nil {
- script = script + line + "\n"
- } else {
- break
- }
- if err != nil {
- break
- }
- }
- if code != "" && script != "" && scriptSpider[code] == nil {
- scriptSpider[code] = map[string]string{
- "code": code,
- "type": "5",
- "script": script,
- //脚本文件属性值空
- "createuser": "",
- "createuseremail": "",
- "modifyuser": "",
- "modifyemail": "",
- }
- filespider = filespider + 1
- //logger.Info("script", script)
- }
- }
- }
- return nil
- })
- logger.Info("节点", util.Config.Uploadevent, "脚本文件爬虫数", filespider)
- return scriptSpider
- }
- //脚本下架、上架、重载
- func UpdateSpiderByCodeState(code, state string) (bool, error) {
- up := false
- var err error
- if state != "5" && state != "-1" { //脚本下架
- SpiderHeart.Delete(code) //脚本下架,删除脚本对应心跳
- logger.Info("下架脚本", code)
- if util.Config.Working == 1 { //队列模式
- for i, as := range []sync.Map{Allspiders, Allspiders2} {
- if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用(7700下架爬虫)
- break
- }
- tmp, b := as.Load(code)
- if b {
- sp, ok := tmp.(*Spider)
- if ok {
- if !sp.Stop { //脚本未执行
- sp.Stop = true
- }
- }
- as.Delete(code)
- logger.Info("下架脚本,Allspiders删除")
- }
- }
- //LoopListPath.Range(func(k, v interface{}) bool {
- // //if v != nil {
- // // info, _ := v.(map[string]string)
- // // if info["code"] == code {
- // // LoopListPath.Store(k, nil)
- // // lock.Lock()
- // // defer lock.Unlock()
- // // ChanDels[qu.IntAll(k)] = code
- // // logger.Info("下架脚本,LoopListPath更新为nil,ChanDels中位置:", k)
- // // }
- // //}
- // if k == code {
- // LoopListPath.Delete(k)
- // logger.Info(code, "脚本下架成功")
- // return false //跳出循环
- // }
- // return true
- //})
- } else { //高性能模式
- for _, as := range []sync.Map{Allspiders, Allspiders2} {
- if tmp, ok := as.Load(code); ok {
- sp, ok := tmp.(*Spider)
- if ok {
- sp.Stop = true
- sp.L.Close()
- as.Delete(code)
- }
- }
- }
- }
- LoopListPath.Delete(code)
- logger.Info(code, "脚本下架成功")
- up = true
- err = nil
- } else if state == "-1" { //爬虫重采更新线上爬虫
- scriptMap := getSpiderScriptDB(code)
- logger.Info("更新线上脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
- if util.Config.Working == 1 { //排队模式
- for _, v := range scriptMap {
- listsize := 0
- listHas := false
- count_ok, count_no := 0, 0
- LoopListPath.Range(func(key, val interface{}) bool {
- listsize++
- if tmp, ok := val.(map[string]string); ok {
- count_ok++
- if tmp["code"] == code && key == code { //队列存在,重载脚本
- logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
- listHas = true
- LoopListPath.Store(key, v)
- UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
- logger.Info("队列模式更新列表页信息状态", code)
- }
- } else {
- count_no++
- }
- return true
- })
- logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
- if !listHas { //队列不存在
- logger.Info("重采更新爬虫失败:", code)
- up = false
- err = errors.New("爬虫不在线:" + code)
- } else {
- up = true
- err = nil
- logger.Info("重采更新爬虫成功", code)
- }
- }
- } else { //高性能模式
- for k, v := range scriptMap {
- if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
- sp := spd.(*Spider)
- sp.ScriptFile = v["script"]
- if v["createuser"] != "" {
- sp.UserName = v["createuser"]
- }
- if v["createuseremail"] != "" {
- sp.UserEmail = v["createuseremail"]
- }
- sp.MUserName = v["modifyuser"]
- sp.MUserEmail = v["modifyemail"]
- Allspiders.Store(k, sp)
- up = true
- err = nil
- logger.Info("重采更新爬虫成功", sp.Code)
- } else { //不存在
- up = false
- err = errors.New("爬虫不在线:" + code)
- logger.Info("重采更新爬虫失败:", code)
- }
- //Allspiders2
- if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
- sp2 := spd2.(*Spider)
- sp2.ScriptFile = v["script"]
- if v["createuser"] != "" {
- sp2.UserName = v["createuser"]
- }
- if v["createuseremail"] != "" {
- sp2.UserEmail = v["createuseremail"]
- }
- sp2.MUserName = v["modifyuser"]
- sp2.MUserEmail = v["modifyemail"]
- sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
- Allspiders2.Store(k, sp2)
- // up = true
- // err = nil
- logger.Info("Allspiders2重采更新爬虫成功", sp2.Code)
- } else { //不存在
- // up = false
- // err = errors.New("爬虫不在线:" + code)
- logger.Info("Allspiders2重采更新爬虫失败:", code)
- }
- }
- }
- } else { //脚本上架
- scriptMap := getSpiderScriptDB(code)
- logger.Info("上架新增脚本,库中是否已存在该脚本:", code, len(scriptMap) > 0, scriptMap[code] != nil)
- if util.Config.Working == 1 { //排队模式
- for _, v := range scriptMap {
- listsize := 0
- listHas := false
- count_ok, count_no := 0, 0
- LoopListPath.Range(func(key, val interface{}) bool {
- listsize++
- if tmp, ok := val.(map[string]string); ok { //此处判断仅仅为了得到count_ok的值,可直接判断key==code
- count_ok++
- if tmp["code"] == code && code == key { //队列存在,重载脚本
- logger.Info("上架新增脚本,队列中以有该脚本,进行更新")
- listHas = true
- LoopListPath.Store(key, v)
- UpdateHighListDataByCode(code) //爬虫更新上架后,重置数据state=0
- logger.Info("队列模式更新列表页信息状态", code)
- }
- } else {
- count_no++
- }
- return true
- })
- logger.Info("上架新增脚本,队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
- if !listHas { //队列中不存在,新增
- logger.Info("上架新增脚本,队列中不存在")
- LoopListPath.Store(code, v) //上架
- // lock.Lock()
- // defer lock.Unlock()
- // if len(ChanDels) > 0 {
- // for i, _ := range ChanDels {
- // logger.Info("上架新增脚本,替补队列中位置", i)
- // LoopListPath.Store(i, v)
- // delete(ChanDels, i)
- // break
- // }
- // } else {
- // logger.Info("上架新增脚本,新增队列中位置", listsize)
- // LoopListPath.Store(listsize, v) //上架
- // }
- //校验是否上架成功
- saveList := false //记录是否上架成功
- listsize, count_ok, count_no = 0, 0, 0
- LoopListPath.Range(func(key, val interface{}) bool {
- listsize++
- if tmp, ok := val.(map[string]string); ok {
- count_ok++
- if tmp["code"] == code && key == code { //队列存在
- saveList = true
- logger.Info("上架脚本成功", code)
- }
- } else {
- count_no++
- }
- return true
- })
- logger.Info("上架爬虫后队列中共有爬虫", listsize, "当前在线数量:", count_ok, "下线数量:", count_no)
- if !saveList { //上架失败
- logger.Info("上架脚本", code, " 失败")
- return false, errors.New("use " + code + " failed")
- }
- }
- logger.Info("上架新增脚本", code)
- up = true
- }
- } else { //高性能模式
- for k, v := range scriptMap {
- LoopListPath.Store(k, v)
- //1、Allspiders对应7000、7100、7400脚本上架下载数据(列表页爬虫集合)
- if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
- sp := spd.(*Spider)
- sp.ScriptFile = v["script"]
- sp.UserName = v["createuser"]
- sp.UserEmail = v["createuseremail"]
- sp.MUserName = v["modifyuser"]
- sp.MUserEmail = v["modifyemail"]
- UpdateSpider(sp, k, v["script"]) //爬虫其他信息更新
- //sp.LoadScript(&sp.Name, &sp.Channel, &sp.MUserName, k, sp.ScriptFile, true, false) //更新上架,重载脚本
- Allspiders.Store(k, sp)
- up = true
- err = nil
- logger.Info("上架重载脚本", sp.Code)
- } else { //新增脚本
- sp, errstr := CreateSpider(k, v["script"], true, false)
- if errstr == "" && sp != nil && sp.Code != "nil" {
- sp.UserName = v["createuser"]
- sp.UserEmail = v["createuseremail"]
- sp.MUserName = v["modifyuser"]
- sp.MUserEmail = v["modifyemail"]
- Allspiders.Store(k, sp)
- sp.StartJob()
- up = true
- err = nil
- logger.Info("上架新增脚本", sp.Code)
- } else {
- err = errors.New("新增失败")
- nowT := time.Now().Unix()
- MgoS.Update("spider_loadfail",
- map[string]interface{}{
- "code": k,
- "modifytime": map[string]interface{}{
- "$gte": nowT - 12*3600,
- "$lte": nowT + 12*3600,
- },
- },
- map[string]interface{}{
- "$set": map[string]interface{}{
- "code": k,
- "type": "新增初始化脚本",
- "script": v["script"],
- "updatetime": nowT,
- "modifyuser": sp.MUserName,
- "event": util.Config.Uploadevent,
- "err": errstr,
- },
- }, true, false)
- }
- }
- //2、Allspiders2对应7100、7110、7400上架采集三级页数据(Allspiders2三级页爬虫集合)
- if util.Config.Modal == 1 {
- //Allspiders2
- if spd2, ok2 := Allspiders2.Load(k); ok2 { //对应脚本已存在,更新
- sp2 := spd2.(*Spider)
- sp2.ScriptFile = v["script"]
- sp2.UserName = v["createuser"]
- sp2.UserEmail = v["createuseremail"]
- sp2.MUserName = v["modifyuser"]
- sp2.MUserEmail = v["modifyemail"]
- UpdateSpider(sp2, k, v["script"]) //爬虫其他信息更新
- sp2.LoadScript(&sp2.Name, &sp2.Channel, &sp2.MUserName, k, sp2.ScriptFile, true, false) //更新上架,重载脚本
- Allspiders2.Store(k, sp2) //重载后放入集合
- UpdateHighListDataByCode(k) //爬虫更新上架后,重置数据state=0
- // up = true
- // err = nil
- logger.Info("Allspiders2上架重载脚本", sp2.Code)
- } else { //新增脚本
- sp2, errstr := CreateSpider(k, v["script"], true, false)
- if errstr == "" && sp2 != nil && sp2.Code != "nil" {
- sp2.UserName = v["createuser"]
- sp2.UserEmail = v["createuseremail"]
- sp2.MUserName = v["modifyuser"]
- sp2.MUserEmail = v["modifyemail"]
- sp2.IsMainThread = true //多线程采集时使用
- go sp2.DownloadHighDetail(true) //根据列表页数据下载三级页
- Allspiders2.Store(k, sp2)
- // up = true
- // err = nil
- logger.Info("Allspiders2上架新增脚本", sp2.Code)
- } /*else {
- err = errors.New("新增失败")
- mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
- "code": k,
- "type": "新增脚本失败",
- "script": v["script"],
- "intime": time.Now().Format(qu.Date_Full_Layout),
- "event": util.Config.Uploadevent,
- })
- }*/
- }
- }
- }
- }
- }
- logger.Info("上下架:", up, err)
- return up, err
- }
- //定时重载脚本文件
- func ReloadSpiderFile() {
- scriptMap := getSpiderScriptFile(true)
- for k, v := range scriptMap {
- for i, as := range []sync.Map{Allspiders, Allspiders2} {
- if i == 1 && util.Config.Modal == 0 { //队列模式原始模式采集Allspiders2无用
- continue
- }
- if spd, ok := as.Load(k); ok { //对应脚本已存在,更新
- sp := spd.(*Spider)
- logger.Info("定时重载脚本", sp.Code)
- sp.ScriptFile = v["script"]
- if v["createuser"] != "" {
- sp.UserName = v["createuser"]
- }
- if v["createuseremail"] != "" {
- sp.UserEmail = v["createuseremail"]
- }
- sp.MUserName = v["modifyuser"]
- sp.MUserEmail = v["modifyemail"]
- as.Store(k, sp)
- } else { //新增脚本
- var sp *Spider
- var errstr string
- if util.Config.Working == 1 { //排队模式
- if i == 0 {
- //length := 0
- //LoopListPath.Range(func(k, v interface{}) bool {
- // length++
- // return true
- //})
- LoopListPath.Store(k, v) //排队模式Allspiders,Allspiders2共用一个LoopListPath,新增一次即可
- sp, errstr = CreateSpider(k, v["script"], false, false)
- } else {
- sp, errstr = CreateSpider(k, v["script"], true, false)
- }
- } else {
- sp, errstr = CreateSpider(k, v["script"], true, false)
- }
- if errstr == "" && sp != nil && sp.Code != "nil" {
- if v["createuser"] != "" {
- sp.UserName = v["createuser"]
- }
- if v["createuseremail"] != "" {
- sp.UserEmail = v["createuseremail"]
- }
- sp.MUserName = v["modifyuser"]
- sp.MUserEmail = v["modifyemail"]
- as.Store(k, sp)
- if util.Config.Working == 1 {
- sp.Stop = true
- // if i == 0 {
- // length := 0
- // LoopListPath.Range(func(k, v interface{}) bool {
- // length++
- // return true
- // })
- // LoopListPath.Store(length, v)
- // }
- } else {
- sp.Stop = false
- if i == 0 { //高性能模式只有Allspiders启动爬虫,Allspiders2只负责下三级页
- sp.StartJob()
- }
- }
- logger.Info("定时重载脚本--新增", sp.Code)
- } else {
- if i == 0 {
- nowT := time.Now().Unix()
- MgoS.Update("spider_loadfail",
- map[string]interface{}{
- "code": k,
- "modifytime": map[string]interface{}{
- "$gte": nowT - 12*3600,
- "$lte": nowT + 12*3600,
- },
- },
- map[string]interface{}{
- "$set": map[string]interface{}{
- "code": k,
- "type": "定时重载--新增失败",
- "script": v["script"],
- "updatetime": nowT,
- "modifyuser": sp.MUserName,
- "event": util.Config.Uploadevent,
- "err": errstr,
- },
- }, true, false)
- }
- }
- }
- }
- // if spd, ok := Allspiders.Load(k); ok { //对应脚本已存在,更新
- // sp := spd.(*Spider)
- // logger.Info("定时重载脚本", sp.Code)
- // sp.ScriptFile = v["script"]
- // if v["createuser"] != "" {
- // sp.UserName = v["createuser"]
- // }
- // if v["createuseremail"] != "" {
- // sp.UserEmail = v["createuseremail"]
- // }
- // sp.MUserName = v["modifyuser"]
- // sp.MUserEmail = v["modifyemail"]
- // Allspiders.Store(k, sp)
- // } else { //新增脚本
- // var sp *Spider
- // if util.Config.Working == 1 { //排队模式
- // length := 0
- // LoopListPath.Range(func(k, v interface{}) bool {
- // length++
- // return true
- // })
- // LoopListPath.Store(length, v)
- // sp = CreateSpider(k, v["script"], false,false)
- // } else {
- // sp = NewSpider(k, v["script"])
- // }
- // if sp != nil && sp.Code != "nil" {
- // if v["createuser"] != "" {
- // sp.UserName = v["createuser"]
- // }
- // if v["createuseremail"] != "" {
- // sp.UserEmail = v["createuseremail"]
- // }
- // sp.MUserName = v["modifyuser"]
- // sp.MUserEmail = v["modifyemail"]
- // Allspiders.Store(k, sp)
- // if util.Config.Working == 1 {
- // sp.Stop = true
- // length := 0
- // LoopListPath.Range(func(k, v interface{}) bool {
- // length++
- // return true
- // })
- // LoopListPath.Store(length, v)
- // } else {
- // sp.Stop = false
- // sp.StartJob()
- // }
- // logger.Info("定时重载脚本--新增", sp.Code)
- // } else {
- // mgu.Save("spider_loadfail", "spider", "spider", map[string]interface{}{
- // "code": k,
- // "type": "定时重载--新增失败",
- // "script": v["script"],
- // "intime": time.Now().Format(qu.Date_Full_Layout),
- // "event": util.Config.Uploadevent,
- // })
- // }
- // }
- }
- util.TimeAfterFunc(time.Duration(15)*time.Minute, ReloadSpiderFile, TimeChan)
- }
- //生成爬虫
- func CreateSpider(code, luafile string, newstate, thread bool) (*Spider, string) {
- defer qu.Catch()
- spider := &Spider{}
- err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, thread)
- if err != "" {
- return nil, err
- }
- spider.Code = spider.GetVar("spiderCode")
- spider.SCode = spider.Code
- spider.Name = spider.GetVar("spiderName")
- spider.Channel = spider.GetVar("spiderChannel")
- //spider.LastExecTime = GetLastExectime(spider.Code)
- spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
- spider.Collection = spider.GetVar("spider2Collection")
- spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
- //spider.Thread = int64(spider.GetIntVar("spiderThread"))
- spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
- spider.StoreMode = spider.GetIntVar("spiderStoreMode")
- spider.CoverAttr = spider.GetVar("spiderCoverAttr")
- spiderSleepBase := spider.GetIntVar("spiderSleepBase")
- if spiderSleepBase == -1 {
- spider.SleepBase = 1000
- } else {
- spider.SleepBase = spiderSleepBase
- }
- spiderSleepRand := spider.GetIntVar("spiderSleepRand")
- if spiderSleepRand == -1 {
- spider.SleepRand = 1000
- } else {
- spider.SleepRand = spiderSleepRand
- }
- spiderTimeout := spider.GetIntVar("spiderTimeout")
- if spiderTimeout == -1 {
- spider.Timeout = 60
- } else {
- spider.Timeout = int64(spiderTimeout)
- }
- spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
- spider.UserName = spider.GetVar("spiderUserName")
- spider.UserEmail = spider.GetVar("spiderUserEmail")
- spider.UploadTime = spider.GetVar("spiderUploadTime")
- //新增历史补漏
- //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
- spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
- spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
- //新老爬虫
- spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
- //爬虫类型
- spider.Infoformat = spider.GetIntVar("spiderInfoformat")
- return spider, ""
- }
- //更新爬虫
- func UpdateSpider(spider *Spider, code, script string) {
- ts := &Spider{}
- ts.Script.L = lua.NewState(lua.Options{
- RegistrySize: 256 * 20,
- CallStackSize: 256,
- IncludeGoStackTrace: false,
- })
- defer ts.L.Close()
- ts.Script.L.PreloadModule("http", gluahttp.NewHttpModule(&http.Client{}).Loader)
- ts.Script.L.PreloadModule("json", lujson.Loader)
- if err := ts.Script.L.DoString(script); err != nil {
- logger.Debug(code + ",加载lua脚本错误:" + err.Error())
- return
- }
- spider.Channel = ts.GetVar("spiderChannel") //栏目名称
- spider.DownDetail = ts.GetBoolVar("spiderDownDetailPage") //是否下三级页
- spider.Collection = ts.GetVar("spider2Collection") //存储表
- spider.SpiderRunRate = int64(ts.GetIntVar("spiderRunRate")) //间隔时间
- spider.StoreToMsgEvent = ts.GetIntVar("spiderStoreToMsgEvent") //4002
- spider.StoreMode = ts.GetIntVar("spiderStoreMode") //2
- spider.CoverAttr = ts.GetVar("spiderCoverAttr") //title
- //下载三级页(DownloadDetailPage)随机延迟
- spiderSleepBase := ts.GetIntVar("spiderSleepBase")
- if spiderSleepBase == -1 {
- spider.SleepBase = 1000
- } else {
- spider.SleepBase = spiderSleepBase
- }
- spiderSleepRand := ts.GetIntVar("spiderSleepRand")
- if spiderSleepRand == -1 {
- spider.SleepRand = 1000
- } else {
- spider.SleepRand = spiderSleepRand
- }
- spiderTimeout := ts.GetIntVar("spiderTimeout")
- if spiderTimeout == -1 {
- spider.Timeout = 60
- } else {
- spider.Timeout = int64(spiderTimeout)
- }
- spider.TargetChannelUrl = ts.GetVar("spiderTargetChannelUrl") //栏目地址
- //新增历史补漏
- spider.IsHistoricalMend = ts.GetBoolVar("spiderIsHistoricalMend")
- spider.IsMustDownload = ts.GetBoolVar("spiderIsMustDownload")
- //新老爬虫
- spider.IsCompete = ts.GetBoolVar("spiderIsCompete")
- //爬虫类型
- spider.Infoformat = spider.GetIntVar("spiderInfoformat")
- }
- //排队模式生成爬虫
- func NewSpider_New(code, luafile string, newstate bool) (*Spider, string) {
- defer qu.Catch()
- spider := &Spider{}
- err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, newstate, false)
- if err != "" {
- return nil, err
- }
- spider.Code = spider.GetVar("spiderCode")
- spider.Script.SCode = spider.Code
- spider.Name = spider.GetVar("spiderName")
- spider.Channel = spider.GetVar("spiderChannel")
- //spider.LastExecTime = GetLastExectime(spider.Code)
- spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
- spider.Collection = spider.GetVar("spider2Collection")
- spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
- spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
- spider.StoreMode = spider.GetIntVar("spiderStoreMode")
- spider.CoverAttr = spider.GetVar("spiderCoverAttr")
- spiderSleepBase := spider.GetIntVar("spiderSleepBase")
- if spiderSleepBase == -1 {
- spider.SleepBase = 1000
- } else {
- spider.SleepBase = spiderSleepBase
- }
- spiderSleepRand := spider.GetIntVar("spiderSleepRand")
- if spiderSleepRand == -1 {
- spider.SleepRand = 1000
- } else {
- spider.SleepRand = spiderSleepRand
- }
- spiderTimeout := spider.GetIntVar("spiderTimeout")
- if spiderTimeout == -1 {
- spider.Timeout = 60
- } else {
- spider.Timeout = int64(spiderTimeout)
- }
- spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
- if v, ok := Allspiders.Load(spider.Code); ok {
- sp := v.(*Spider)
- spider.TodayDowncount = sp.TodayDowncount
- spider.ToDayRequestNum = sp.ToDayRequestNum
- spider.YesterdayDowncount = sp.YesterdayDowncount
- spider.YestoDayRequestNum = sp.YestoDayRequestNum
- spider.TotalDowncount = sp.TotalDowncount
- spider.TotalRequestNum = sp.TotalRequestNum
- spider.ErrorNum = sp.ErrorNum
- spider.RoundCount = sp.RoundCount
- }
- spider.UserName = spider.GetVar("spiderUserName")
- spider.UserEmail = spider.GetVar("spiderUserEmail")
- spider.UploadTime = spider.GetVar("spiderUploadTime")
- //新增历史补漏
- spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
- spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
- //新老爬虫
- spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
- //爬虫类型
- spider.Infoformat = spider.GetIntVar("spiderInfoformat")
- return spider, ""
- }
- //高性能模式生成爬虫
- func NewSpider(code, luafile string) (*Spider, string) {
- defer qu.Catch()
- spider := &Spider{}
- err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, false)
- if err != "" {
- return nil, err
- }
- spider.Code = spider.GetVar("spiderCode")
- spider.SCode = spider.Code
- spider.Name = spider.GetVar("spiderName")
- spider.Channel = spider.GetVar("spiderChannel")
- //spider.LastExecTime = GetLastExectime(spider.Code)
- spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
- spider.Collection = spider.GetVar("spider2Collection")
- spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
- //spider.Thread = int64(spider.GetIntVar("spiderThread"))
- spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
- spider.StoreMode = spider.GetIntVar("spiderStoreMode")
- spider.CoverAttr = spider.GetVar("spiderCoverAttr")
- spiderSleepBase := spider.GetIntVar("spiderSleepBase")
- if spiderSleepBase == -1 {
- spider.SleepBase = 1000
- } else {
- spider.SleepBase = spiderSleepBase
- }
- spiderSleepRand := spider.GetIntVar("spiderSleepRand")
- if spiderSleepRand == -1 {
- spider.SleepRand = 1000
- } else {
- spider.SleepRand = spiderSleepRand
- }
- spiderTimeout := spider.GetIntVar("spiderTimeout")
- if spiderTimeout == -1 {
- spider.Timeout = 60
- } else {
- spider.Timeout = int64(spiderTimeout)
- }
- spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
- date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
- tmp := GetDownloadLast(spider.Code, date) //
- if len(tmp) > 0 {
- spider.TodayDowncount = int32(qu.IntAll(tmp["todaydowncount"]))
- spider.ToDayRequestNum = int32(qu.IntAll(tmp["todaydownreq"]))
- spider.YesterdayDowncount = int32(qu.IntAll(tmp["yesdowncount"]))
- spider.YestoDayRequestNum = int32(qu.IntAll(tmp["yesdownreq"]))
- spider.TotalDowncount = spider.TodayDowncount + int32(qu.IntAll(tmp["totaldown"]))
- spider.TotalRequestNum = spider.ToDayRequestNum + int32(qu.IntAll(tmp["totalreq"]))
- }
- spider.UserName = spider.GetVar("spiderUserName")
- spider.UserEmail = spider.GetVar("spiderUserEmail")
- spider.UploadTime = spider.GetVar("spiderUploadTime")
- //新增历史补漏
- //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
- spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
- spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
- //新老爬虫
- spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
- //爬虫类型
- spider.Infoformat = spider.GetIntVar("spiderInfoformat")
- return spider, ""
- }
- //多线程生成爬虫
- func NewSpiderForThread(code, luafile string) (*Spider, string) {
- defer qu.Catch()
- spider := &Spider{}
- err := spider.LoadScript(&spider.Name, &spider.Channel, &spider.MUserName, code, luafile, true, true)
- if err != "" {
- return nil, err
- }
- spider.Code = spider.GetVar("spiderCode")
- spider.SCode = spider.Code
- spider.Script.SCode = spider.Code
- spider.Name = spider.GetVar("spiderName")
- spider.Channel = spider.GetVar("spiderChannel")
- //spider.LastExecTime = GetLastExectime(spider.Code)
- spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage")
- spider.Collection = spider.GetVar("spider2Collection")
- spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate"))
- //spider.Thread = int64(spider.GetIntVar("spiderThread"))
- spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent")
- spider.StoreMode = spider.GetIntVar("spiderStoreMode")
- spider.CoverAttr = spider.GetVar("spiderCoverAttr")
- spiderSleepBase := spider.GetIntVar("spiderSleepBase")
- if spiderSleepBase == -1 {
- spider.SleepBase = 1000
- } else {
- spider.SleepBase = spiderSleepBase
- }
- spiderSleepRand := spider.GetIntVar("spiderSleepRand")
- if spiderSleepRand == -1 {
- spider.SleepRand = 1000
- } else {
- spider.SleepRand = spiderSleepRand
- }
- spiderTimeout := spider.GetIntVar("spiderTimeout")
- if spiderTimeout == -1 {
- spider.Timeout = 60
- } else {
- spider.Timeout = int64(spiderTimeout)
- }
- spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl")
- spider.UserName = spider.GetVar("spiderUserName")
- spider.UserEmail = spider.GetVar("spiderUserEmail")
- spider.UploadTime = spider.GetVar("spiderUploadTime")
- //新增历史补漏
- //qu.Debug("-------", spider.GetBoolVar("spiderIsHistoricalMend"), spider.GetBoolVar("spiderIsMustDownload"))
- spider.IsHistoricalMend = spider.GetBoolVar("spiderIsHistoricalMend")
- spider.IsMustDownload = spider.GetBoolVar("spiderIsMustDownload")
- //新老爬虫
- spider.IsCompete = spider.GetBoolVar("spiderIsCompete")
- //爬虫类型
- spider.Infoformat = spider.GetIntVar("spiderInfoformat")
- return spider, ""
- }
- //下载量入库
- func SaveDownCount(code string, addtotal bool, todayDowncount, todayRequestNum, yesterdayDowncount, yestoDayRequestNum int32) {
- date := time.Unix(time.Now().Unix(), 0).Format(qu.Date_Short_Layout)
- updata := map[string]interface{}{}
- if addtotal {
- updata = map[string]interface{}{
- "$inc": map[string]interface{}{"totaldown": todayDowncount, "totalreq": todayRequestNum},
- "$set": map[string]interface{}{
- "yesdowncount": yesterdayDowncount,
- "yesdownreq": yestoDayRequestNum,
- "todaydowncount": todayDowncount,
- "todaydownreq": todayRequestNum,
- "date": date,
- "year": time.Now().Year(),
- "month": time.Now().Month(),
- "day": time.Now().Day(),
- },
- }
- } else {
- updata = map[string]interface{}{
- "$set": map[string]interface{}{
- "yesdowncount": yesterdayDowncount,
- "yesdownreq": yestoDayRequestNum,
- "todaydowncount": todayDowncount,
- "todaydownreq": todayRequestNum,
- "date": date,
- "year": time.Now().Year(),
- "month": time.Now().Month(),
- "day": time.Now().Day(),
- },
- }
- }
- MgoS.Update("spider_downlog", map[string]interface{}{"code": code, "date": date}, updata, true, false)
- }
- //获取下载的上下限(没用)
- func GetLimitDownload(code string) (uplimit, lowlimit int) {
- defer qu.Catch()
- ret, _ := MgoS.FindOne("spider_ldtime", map[string]interface{}{"code": code})
- if ret != nil && len(*ret) > 0 {
- uplimit = qu.IntAll((*ret)["uplimit"])
- lowlimit = qu.IntAll((*ret)["lowlimit"])
- return uplimit, lowlimit
- } else {
- return 100, 0
- }
- }
- //拼装脚本
- func GetScriptByTmp(luaconfig map[string]interface{}) string {
- defer qu.Catch()
- script := ""
- if luaconfig["listcheck"] == nil {
- luaconfig["listcheck"] = ""
- }
- if luaconfig["contentcheck"] == nil {
- luaconfig["contentcheck"] = ""
- }
- if luaconfig != nil && len(luaconfig) > 0 {
- common := luaconfig["param_common"].([]interface{})
- //新增spiderIsHistoricalMend spiderIsMustDownload
- if len(common) == 15 {
- common = append(common, "", "", "")
- } else {
- common = append(common, false, false, "", "", "")
- }
- for k, v := range common {
- if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
- common[k] = qu.IntAll(v)
- }
- }
- script, _ = GetTmpModel(map[string][]interface{}{"common": common})
- //发布时间
- script_time := ""
- if qu.IntAll(luaconfig["type_time"]) == 0 { //向导模式
- time := luaconfig["param_time"].([]interface{})
- script_time, _ = GetTmpModel(map[string][]interface{}{
- "time": time,
- })
- } else { //专家模式
- script_time = luaconfig["str_time"].(string)
- }
- //列表页
- script_list := ""
- if qu.IntAll(luaconfig["type_list"]) == 0 { //向导模式
- list := luaconfig["param_list"].([]interface{})
- addrs := strings.Split(list[1].(string), "\n")
- if len(addrs) > 0 {
- for k, v := range addrs {
- addrs[k] = "'" + v + "'"
- }
- list[1] = strings.Join(addrs, ",")
- } else {
- list[1] = ""
- }
- script_list, _ = GetTmpModel(map[string][]interface{}{
- "list": list,
- "listcheck": []interface{}{luaconfig["listcheck"]},
- })
- } else { //专家模式
- script_list = luaconfig["str_list"].(string)
- }
- //三级页
- script_content := ""
- if qu.IntAll(luaconfig["type_content"]) == 0 { //向导模式
- content := luaconfig["param_content"].([]interface{})
- script_content, _ = GetTmpModel(map[string][]interface{}{
- "content": content,
- "contentcheck": []interface{}{luaconfig["contentcheck"]},
- })
- } else { //专家模式
- script_content = luaconfig["str_content"].(string)
- }
- script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"], luaconfig["spidercompete"], luaconfig["infoformat"])
- script += `
- ` + script_time + `
- ` + script_list + `
- ` + script_content
- script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
- }
- return script
- }
- //生成爬虫脚本
- func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
- qu.Try(func() {
- //param_common拼接
- if param != nil && param["common"] != nil {
- if len(param["common"]) < 12 {
- err = "公共参数配置不全"
- } else {
- script = fmt.Sprintf(util.Tmp_common, param["common"]...)
- }
- }
- //发布时间拼接
- if param != nil && param["time"] != nil {
- if len(param["time"]) < 3 {
- err = "方法:time-参数配置不全"
- } else {
- script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
- }
- }
- //列表页拼接
- if param != nil && param["list"] != nil {
- if len(param["list"]) < 7 {
- err = "方法:list-参数配置不全"
- } else {
- list := []interface{}{param["listcheck"][0]}
- list = append(list, param["list"]...)
- script += fmt.Sprintf(util.Tmp_pagelist, list...)
- script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
- }
- }
- //详情页拼接
- if param != nil && param["content"] != nil {
- if len(param["content"]) < 2 {
- err = "方法:content-参数配置不全"
- } else {
- content := []interface{}{param["contentcheck"][0]}
- content = append(content, param["content"]...)
- script += fmt.Sprintf(util.Tmp_content, content...)
- }
- }
- }, func(e interface{}) {
- err = e
- })
- return script, err
- }
- //补充模型
- func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
- defer qu.Catch()
- //补充通用信息
- commstr := `item["spidercode"]="` + comm[0].(string) + `";`
- commstr += `item["site"]="` + comm[1].(string) + `";`
- commstr += `item["channel"]="` + comm[2].(string) + `";`
- script = strings.Replace(script, "--Common--", commstr, -1)
- //补充模型信息
- modelstr := ""
- for k, v := range model {
- modelstr += `item["` + k + `"]="` + v.(string) + `";`
- }
- script = strings.Replace(script, "--Model--", modelstr, -1)
- return script
- }
- //爬虫信息提交编辑器(心跳)
- func SpiderInfoSend() {
- time.Sleep(15 * time.Second)
- list := []interface{}{}
- Allspiders.Range(func(key, value interface{}) bool {
- v := value.(*Spider)
- info := map[string]interface{}{}
- info["code"] = v.Code
- info["todayDowncount"] = v.TodayDowncount
- info["toDayRequestNum"] = v.ToDayRequestNum
- info["yesterdayDowncount"] = v.YesterdayDowncount
- info["yestoDayRequestNum"] = v.YestoDayRequestNum
- info["totalDowncount"] = v.TotalDowncount
- info["totalRequestNum"] = v.TotalRequestNum
- info["errorNum"] = v.ErrorNum
- info["roundCount"] = v.RoundCount
- info["runRate"] = v.SpiderRunRate
- info["lastHeartbeat"] = v.LastHeartbeat
- info["lastDowncount"] = v.LastDowncount
- info["lstate"] = v.L.Status(v.L)
- list = append(list, info)
- return true
- })
- bs, _ := json.Marshal(list)
- value := url.Values{
- "data": []string{util.Se.EncodeString(string(bs))},
- "type": []string{"info"},
- }
- _, err := http.PostForm(util.Config.Editoraddr, value)
- if err != nil {
- logger.Error("send to editor: ", err.Error())
- }
- util.TimeAfterFunc(5*time.Minute, SpiderInfoSend, TimeChan)
- }
- //保存心跳信息
- func SaveHeartInfo() {
- time.Sleep(20 * time.Minute)
- num := 0
- SpiderHeart.Range(func(key, value interface{}) bool {
- code := key.(string)
- sp, spiderOk := LoopListPath.Load(code)
- if spiderOk && sp != nil {
- heart, heartOk := value.(*Heart)
- if heartOk {
- num++
- update := []map[string]interface{}{}
- update = append(update, map[string]interface{}{"code": code})
- update = append(update, map[string]interface{}{"$set": map[string]interface{}{
- "site": heart.Site,
- "channel": heart.Channel,
- "list": heart.ListHeart,
- "findlist": heart.FindListHeart,
- "detail": heart.DetailHeart,
- "detailexecute": heart.DetailExecuteHeart,
- "modifyuser": heart.ModifyUser,
- "event": util.Config.Uploadevent,
- "updatetime": time.Now().Unix(),
- "del": false,
- }})
- UpdataHeartCache <- update
- }
- } else {
- SpiderHeart.Delete(key)
- }
- return true
- })
- logger.Info("更新心跳个数:", num)
- time.AfterFunc(20*time.Minute, SaveHeartInfo)
- }
- //保存7000节点爬虫转增量节点日志
- func SpiderCodeSendToEditor(code string) {
- defer qu.Catch()
- MgoEB.Save("luamovelog", map[string]interface{}{
- "code": code,
- "comeintime": time.Now().Unix(),
- "ok": false,
- })
- //ok := false
- //for i := 1; i <= 3; i++ {
- // logger.Info("Code:", code, " times:", i, " Send Move Event")
- // list := []interface{}{}
- // list = append(list, code)
- // bs, _ := json.Marshal(list)
- // value := url.Values{
- // "data": []string{util.Se.EncodeString(string(bs))},
- // "type": []string{"code"},
- // }
- // res, err := http.PostForm(util.Config.Editoraddr, value)
- // if err != nil {
- // logger.Error("Send To Editor For Move Event Failed,Code:", code)
- // } else {
- // if res != nil {
- // res.Body.Close()
- // }
- // ok = true
- // break
- // }
- //}
- //logger.Info("Code:", code, " Send Move Event:", ok)
- //MgoEB.Save("luamovelog", map[string]interface{}{
- // "code": code,
- // "comeintime": time.Now().Unix(),
- // "type": "sendfail",
- // "ok": ok,
- //})
- }
|