12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340 |
- package front
- import (
- "encoding/json"
- "errors"
- "fmt"
- "log"
- "sort"
- "spider"
- // "math/rand"
- mu "mfw/util"
- qu "qfw/util"
- mgdb "qfw/util/mongodb"
- mgu "qfw/util/mongodbutil"
- util "spiderutil"
- "strings"
- "time"
- u "util"
- "gopkg.in/mgo.v2/bson"
- )
- type Base struct {
- SpiderCode string
- SpiderCodeOld string
- SpiderName string
- SpiderChannel string
- SpiderDownDetailPage bool
- SpiderStartPage int
- SpiderMaxPage int
- SpiderRunRate int
- Spider2Collection string
- SpiderPageEncoding string
- SpiderStoreMode int //1,2
- SpiderStoreToMsgEvent int
- SpiderTargetChannelUrl string
- SpiderLastDownloadTime string
- SpiderIsHistoricalMend bool
- SpiderIsMustDownload bool
- }
- type Step1 struct {
- Address string
- ContentChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step2 struct {
- Listadd string
- Listadds string
- BlockChooser string
- AddressChooser string
- TitleChooser string
- DateChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step3 struct {
- ContentChooser string
- ElementChooser string
- T_title string
- T_href string
- T_date string
- Expert string
- Types int
- }
- type StepRe3 struct {
- Checked bool
- Expert string
- }
- //加载某个爬虫
- func (f *Front) LoadSpider(codeTaskIdReState string) error {
- tmpStr := strings.Split(codeTaskIdReState, "__")
- code := tmpStr[0]
- taskId := tmpStr[1]
- auth := qu.IntAll(f.GetSession("auth"))
- restate := -1
- if taskId == "restate=1" { //重采编辑
- restate = 1
- } else if taskId == "restate=2" {
- restate = 2
- } else if taskId == "restate=3" {
- restate = 3
- } else {
- if auth == role_dev && (f.GetSession(taskId) == nil || f.GetSession(taskId) == "") {
- xgTime := time.Unix(time.Now().Unix(), 0).Format("2006-01-02 15:04:05")
- f.SetSession(taskId, xgTime)
- }
- }
- copy := f.GetString("copy")
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(code)
- f.T["actiontext"] = "编辑"
- lua := *mgdb.FindOne("luaconfig", bson.M{"code": code})
- auth := qu.IntAll(f.GetSession("auth"))
- if lua["createuserid"].(string) == f.GetSession("userid").(string) || auth >= 1 {
- if len(lua) > 0 {
- luacopy := map[string]interface{}{}
- if copy != "" {
- luacopy = *mgdb.FindOne("luaconfig", bson.M{"code": copy})
- if len(luacopy) > 0 {
- lua["model"] = luacopy["model"]
- common_copy := luacopy["param_common"].([]interface{})
- common := lua["param_common"].([]interface{})
- common_copy[0] = common[0]
- common_copy[1] = common[1]
- common_copy[2] = common[2]
- common_copy[11] = common[11]
- lua["param_common"] = luacopy["param_common"]
- lua["param_time"] = luacopy["param_time"]
- lua["param_list"] = luacopy["param_list"]
- lua["param_content"] = luacopy["param_content"]
- lua["str_list"] = luacopy["str_list"]
- lua["str_time"] = luacopy["str_time"]
- lua["str_content"] = luacopy["str_content"]
- lua["Thref"] = luacopy["Thref"]
- lua["Tpublishtime"] = luacopy["Tpublishtime"]
- lua["Ttitle"] = luacopy["Ttitle"]
- lua["Tdate"] = luacopy["Tdate"]
- lua["type_content"] = luacopy["type_content"]
- lua["type_list"] = luacopy["type_list"]
- lua["type_time"] = luacopy["type_time"]
- }
- }
- if lua["listcheck"] != nil {
- listcheck := lua["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- lua["listcheck"] = listcheck
- }
- if lua["contentcheck"] != nil {
- contentcheck := lua["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- lua["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent(lua["model"], "", " ")
- lua["js"] = string(js)
- f.T["lua"] = lua
- f.T["taskId"] = taskId
- f.T["restate"] = restate
- f.T["isflow"] = lua["isflow"]
- f.T["spidertype"] = lua["spidertype"]
- f.T["spidermovevent"] = lua["spidermovevent"]
- f.T["spiderhistorymaxpage"] = lua["spiderhistorymaxpage"]
- if lua["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spideredit.html", &f.T)
- }
- } else {
- f.Write("您没有编辑他人脚本的权限")
- }
- }
- return nil
- }
- //查看某个爬虫
- func (f *Front) ViewSpider(id string) error {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth >= 1 {
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(id)
- f.T["actiontext"] = "编辑"
- lua := *mgdb.FindOne("luaconfig", bson.M{"code": code})
- if len(lua) > 0 {
- if lua["listcheck"] != nil {
- listcheck := lua["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- lua["listcheck"] = listcheck
- }
- if lua["contentcheck"] != nil {
- contentcheck := lua["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- lua["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent(lua["model"], "", " ")
- lua["js"] = string(js)
- f.T["lua"] = lua
- f.T["isflow"] = lua["isflow"]
- f.T["spidertype"] = lua["spidertype"]
- f.T["spidermovevent"] = lua["spidermovevent"]
- f.T["spiderhistorymaxpage"] = lua["spiderhistorymaxpage"]
- if lua["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spiderview.html", &f.T)
- } else {
- f.Write("没有对应记录!")
- return nil
- }
- }
- return f.Redirect("/center")
- } else {
- f.Write("您没有查看他人脚本的权限")
- return nil
- }
- }
- func (f *Front) LoadModel(id string) error {
- if f.Method() == "GET" {
- lua := *mgdb.Find("luaconfig", bson.M{"code": id}, nil, bson.M{"model": 1}, true, -1, -1)
- if len(lua) > 0 {
- f.ServeJson(lua[0])
- }
- }
- return f.Redirect("/center")
- }
- func (f *Front) SaveStep() {
- userid, _ := f.GetSession("userid").(string)
- auth := qu.IntAll(f.GetSession("auth"))
- rep := map[string]interface{}{}
- if f.GetString("oldlua") != "" {
- id := f.GetString("code")
- one := *mgdb.FindOne("luaconfig", bson.M{"code": id})
- id = one["code"].(string)
- script := f.GetStringComm("script")
- if strings.Index(script, id) == -1 {
- rep["msg"] = "code/名称都不能更改"
- f.ServeJson(rep)
- return
- } else {
- upset := bson.M{"luacontent": script}
- upset["modifytime"] = time.Now().Unix()
- b := mgdb.Update("luaconfig", bson.M{"code": id}, bson.M{"$set": upset}, true, false)
- if b {
- rep["msg"] = "保存成功"
- rep["code"] = util.Se.Encode2Hex(id)
- f.ServeJson(rep)
- return
- }
- }
- } else {
- if f.Base.SpiderName != "" && f.Base.SpiderCode != "" {
- code := f.Base.SpiderCode
- one := *mgdb.FindOne("luaconfig", bson.M{"code": f.Base.SpiderCode})
- state := qu.IntAllDef(one["state"], 0)
- restate := qu.IntAll(one["restate"])
- comeintime := time.Now().Unix()
- if len(one) > 0 {
- comeintime = qu.Int64All(one["comeintime"])
- ouserid := one["createuserid"].(string)
- if ouserid != userid && auth == role_dev {
- f.Write("权限不够,不能修改他人脚本")
- return
- } else {
- code = one["code"].(string)
- f.Base.SpiderCode = one["code"].(string)
- f.Base.SpiderName = (one["param_common"].([]interface{}))[1].(string)
- }
- } else {
- if auth != role_admin {
- f.Write("不能新建爬虫,请联系管理员导入")
- return
- }
- }
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- if auth == role_dev {
- //f.Base.SpiderStoreToMsgEvent = 4002
- }
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- }
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- param := map[string]interface{}{}
- common[4] = 1
- param["param_common"] = common
- //向导模式
- param["param_time"] = ptime
- param["param_list"] = list
- param["param_content"] = content
- param["type_time"] = f.Step1.Types
- param["type_list"] = f.Step2.Types
- param["type_content"] = f.Step3.Types
- //专家模式
- param["str_time"] = f.Step1.Expert
- param["str_list"] = f.Step2.Expert
- param["str_content"] = f.Step3.Expert
- param["comeintime"] = comeintime
- listcheck = strings.Replace(listcheck, "\n", "\\\\n", -1)
- param["listcheck"] = strings.Replace(listcheck, "\"", "\\\\\"", -1)
- contentcheck = strings.Replace(contentcheck, "\n", "\\\\n", -1)
- param["contentcheck"] = strings.Replace(contentcheck, "\"", "\\\\\"", -1)
- //补充模型
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- model["model"] = s_model
- param["code"] = f.Base.SpiderCode
- param["model"] = model
- if len(one) > 0 {
- param["createuser"] = one["createuser"]
- param["createuserid"] = one["createuserid"]
- param["code"] = one["code"]
- //开发员关联任务修改爬虫状态
- state = qu.IntAll(one["state"])
- if auth == role_dev && state >= Sp_state_3 && restate != 1 { //开发员修改,已经审核通过(不包含已上架),状态重置为待完成(restate!=1判断,重采修改保存爬虫时不修改爬虫状态)
- param["state"] = 0
- } else {
- param["state"] = state
- }
- } else {
- param["createuser"] = f.GetSession("loginuser")
- param["createuserid"] = f.GetSession("userid")
- param["createuseremail"] = f.GetSession("email")
- param["next"] = f.GetSession("email")
- param["state"] = 0
- }
- if qu.ObjToString(one["modifyuser"]) == "" {
- param["modifyuser"] = param["createuser"]
- param["modifyuserid"] = param["createuserid"]
- }
- param["modifytime"] = time.Now().Unix()
- param["Ttitle"] = f.Step3.T_title
- param["Thref"] = f.Step3.T_href
- param["Tdate"] = f.Step3.T_date
- //其他信息
- param["isflow"] = f.OtherBase.IsFlow
- param["spidertype"] = f.OtherBase.SpiderType
- param["spidermovevent"] = f.OtherBase.SpiderMoveEvent
- param["spiderhistorymaxpage"] = f.OtherBase.SpiderHistoryMaxPage
- if f.OtherBase.SpiderType == "history" { //爬虫类型是history的放到7000节点,并记录历史节点
- param["event"] = 7000
- if event := qu.IntAll(one["event"]); event != 7000 {
- param["historyevent"] = event
- }
- }
- if f.OtherBase.SpiderMoveEvent == "7700" {
- param["historyevent"] = 7700
- }
- //三级页复制
- param["str_recontent"] = f.StepRe3.Expert
- param["iscopycontent"] = f.StepRe3.Checked
- issave := spider.SaveSpider(code, param) //保存脚本
- if issave {
- for k, v := range one {
- if k != "_id" && param[k] == nil {
- param[k] = v
- }
- }
- Wlog(f.Base.SpiderName, f.Base.SpiderCode, f.GetSession("username").(string), f.GetSession("userid").(string), "修改", param)
- rep["msg"] = "保存成功"
- } else {
- rep["msg"] = "保存失败"
- }
- rep["code"] = util.Se.Encode2Hex(code)
- f.ServeJson(rep)
- }
- }
- }
- //方法测试
- func (f *Front) RunStep() {
- imodal, _ := f.GetInteger("imodal")
- script, _ := f.GetBool("script")
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- downloadnode := f.GetString("downloadnode") //下载节点
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- "",
- "",
- "",
- }
- if f.Method() == "POST" {
- switch f.GetString("step") {
- case "step1":
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- if script {
- _, scripts := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal, 1)
- f.ServeJson(scripts)
- return
- }
- rs, err := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- }
- case "step2":
- addrs := strings.Split(f.Step2.Listadds, "\n")
- if len(addrs) > 0 {
- for k, v := range addrs {
- addrs[k] = "'" + v + "'"
- }
- f.Step2.Listadds = strings.Join(addrs, ",")
- } else if len(f.Step2.Listadds) > 5 {
- f.Step2.Listadds = "'" + f.Step2.Listadds + "'"
- } else {
- f.Step2.Listadds = ""
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- listcheck = strings.Replace(listcheck, "\n", "\\n", -1)
- listcheck = strings.Replace(listcheck, "\"", "\\\"", -1)
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- if script {
- _, script := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- } else if err.(error).Error() == "no" {
- f.ServeJson(rs[0])
- }
- case "step3":
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- contentcheck = strings.Replace(contentcheck, "\n", "\\n", -1)
- contentcheck = strings.Replace(contentcheck, "\"", "\\\"", -1)
- data := map[string]interface{}{}
- data["title"] = f.Step3.T_title
- data["href"] = f.Step3.T_href
- data["publishtime"] = f.Step3.T_date
- if script {
- _, script := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal)
- if projectinfo, ok := rs["projectinfo"].(map[string]interface{}); ok && projectinfo != nil {
- if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok && attachments != nil {
- for _, tmp := range attachments {
- tmpMap := tmp.(map[string]interface{})
- if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" {
- rs["msg"] = "附件中含有乱码"
- }
- }
- }
- }
- if err == nil {
- f.ServeJson(rs)
- } else {
- f.ServeJson(rs["no"])
- }
- }
- }
- }
- //爬虫测试数据json
- func (f *Front) GetJson() {
- list, _ := f.GetSession("listInfo").([]map[string]interface{})
- data, _ := f.GetSession("dataInfo").(map[string]interface{})
- descript := f.GetSession("task_descript")
- remark := f.GetSession("task_remark")
- reason := f.GetSession("reason")
- username := f.GetSession("username").(string)
- msg := f.GetSession(username + "_msg")
- if len(data) > 0 {
- data["contenthtml"] = ""
- }
- for k, v := range list {
- v["a_index"] = k + 1
- }
- f.T["list"] = list
- f.T["data"] = data
- f.T["descript"] = descript
- f.T["remark"] = remark
- f.T["reason"] = reason
- f.T["msg"] = msg
- f.DelSession("listInfo")
- f.DelSession("dataInfo")
- f.DelSession("task_descript")
- f.DelSession("task_remark")
- f.DelSession("reason")
- f.Render("jsonInfo.html", &f.T)
- }
- //整体测试
- func (f *Front) SpiderPass() {
- defer mu.Catch()
- list := []map[string]interface{}{}
- data := map[string]interface{}{}
- msg1 := ""
- code := f.GetString("code")
- downloadnode := f.GetString("node")
- //根据code查询待确认任务
- query := bson.M{
- "s_code": code,
- "i_state": 3,
- }
- task := *mgdb.FindOne("task", query)
- descript := "null"
- remark := "null"
- remarktmp := []string{}
- if len(task) > 0 {
- descript = task["s_descript"].(string)
- if mrecord, ok := task["a_mrecord"].([]interface{}); ok {
- for _, m := range mrecord {
- remarkInfo := m.(map[string]interface{})
- r := remarkInfo["s_mrecord_remark"].(string)
- if r != "" {
- remarktmp = append(remarktmp, r+";")
- }
- }
- }
- }
- if len(remarktmp) > 0 {
- remark = ""
- remark = strings.Join(remarktmp, "")
- }
- f.SetSession("task_remark", remark)
- f.SetSession("task_descript", descript)
- //基本信息、方法一、方法二、方法三、总请求次数、go方法一、go方法二、go方法三、列表页条数
- steps := []interface{}{false, false, false, false, 0, 0, 0, 0, 0}
- one := *mgdb.FindOne("luaconfig", bson.M{"code": code})
- reason, _ := one["reason"].(string)
- f.SetSession("reason", reason)
- if len(one) > 0 && one["oldlua"] == nil {
- common := one["param_common"].([]interface{})
- if len(common) < 13 {
- f.ServeJson(steps)
- return
- } else {
- steps[0] = true
- }
- } else {
- steps[0] = true
- }
- script, liststr, contentstr := "", "", ""
- if one["oldlua"] == nil {
- script, liststr, contentstr = spider.GetScript(code)
- } else {
- script = one["luacontent"].(string)
- }
- if liststr != "" && contentstr != "" {
- msg1 = u.SpiderPassCheckLua(liststr, contentstr, one)
- }
- s := spider.CreateSpider(downloadnode, script)
- s.SpiderMaxPage = 1
- s.Timeout = 60
- time, timeerr := s.GetLastPublishTime()
- if timeerr == nil && len(time) > 4 {
- steps[1] = true
- list, _ = s.DownListPageItem()
- if len(list) > 0 {
- f.SetSession("listInfo", list)
- listone := list[0]
- if len(qu.ObjToString(listone["href"])) < 7 ||
- (qu.ObjToString(listone["publishtime"]) != "0" && len(qu.ObjToString(listone["publishtime"])) < 5) ||
- len(qu.ObjToString(listone["title"])) < 3 {
- f.ServeJson(steps)
- return
- } else {
- steps[2] = true
- if s.DownDetail {
- param := map[string]string{}
- index := 0
- if len(list) > 0 {
- steps[8] = len(list)
- index = len(list) / 2
- for k, v := range list[index] {
- param[k] = qu.ObjToString(v)
- }
- data = map[string]interface{}{}
- s.DownloadDetailPage(param, data)
- if len(data) > 0 {
- f.SetSession("dataInfo", data)
- } else {
- f.SetSession("dataInfo", "")
- }
- if len(data) == 0 || data["detail"].(string) == "" {
- steps[3] = false
- } else {
- steps[3] = true
- }
- }
- } else {
- steps[3] = true
- }
- }
- } else {
- f.SetSession("listInfo", "")
- f.SetSession("dataInfo", "")
- }
- }
- //关闭laustate
- s.L.Close()
- steps[4] = s.Test_luareqcount
- steps[5] = s.Test_goreqtime
- steps[6] = s.Test_goreqlist
- steps[7] = s.Test_goreqcon
- //校验
- msg := u.SpiderPassCheckListAndDetail(list, data)
- if msg1 != "" {
- msg = msg1 + "," + msg
- }
- username := f.GetSession("username").(string)
- f.SetSession(username+"_msg", msg)
- f.ServeJson(steps)
- }
- func (f *Front) DownSpider(id string) {
- //auth := qu.IntAll(f.GetSession("auth"))
- //if auth > role_dev {
- one := *mgdb.FindOne("luaconfig", bson.M{"code": id})
- script := ""
- filename := id + ".lua"
- if len(one) > 0 {
- if one["oldlua"] != nil {
- if one["luacontent"] != nil {
- script = one["luacontent"].(string)
- }
- } else {
- user := *mgdb.FindOne("user", bson.M{"_id": bson.ObjectIdHex(one["createuserid"].(string))})
- name := one["createuser"]
- email := user["s_email"]
- upload := time.Now().Format("2006-01-02 15:04:05")
- script, _, _ = spider.GetScript(id, name, email, upload)
- }
- }
- f.ResponseWriter.Header().Del("Content-Type")
- f.ResponseWriter.Header().Add("Content-Type", "application/x-download")
- f.ResponseWriter.Header().Add("Content-Disposition", "attachment;filename=spider_"+filename)
- f.WriteBytes([]byte(script))
- // } else {
- // f.Write("您没有权限")
- // }
- }
- //更新爬虫状态
- func (f *Front) UpState() error {
- username := f.GetSession("username").(string)
- code := f.GetString("code")
- state, _ := f.GetInt("state")
- id := f.GetString("taskId")
- reason := f.GetString("reason")
- auth := qu.IntAll(f.GetSession("auth"))
- var codeArr = []string{code}
- var taskid []string
- //修改任务状态
- istotask := false
- res := map[string]interface{}{
- "istotask": istotask,
- "err": "没有权限",
- "code": util.Se.Encode2Hex(code),
- "taskid": taskid,
- }
- var xgTime int64
- if f.GetSession(id) == nil || f.GetSession(id) == "" {
- xgTime = time.Now().Unix()
- } else {
- xgTimeStr := qu.ObjToString(f.GetSession(id))
- xgTimeTmp, _ := time.ParseInLocation("2006-01-02 15:04:05", xgTimeStr, time.Local)
- xgTime = xgTimeTmp.Unix()
- }
- f.DelSession(id)
- if IsHasUpState(auth, int(state)) {
- b, err := UpStateAndUpSpider(code, "", reason, username, int(state)) //更新爬虫状态
- if b && state == Sp_state_1 { //提交审核
- //有对应任务跳转提交记录页
- taskid = checkTask(codeArr, 1)
- if len(taskid) > 0 {
- res["istotask"] = true
- res["taskid"] = taskid[0]
- }
- } else if b && state == Sp_state_2 { //打回
- taskid = checkTask(codeArr, 2)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 2) //修改状态
- UpTaskState(taskid, 2, "", int64(0)) //修改任务状态
- SaveRemark(taskid, reason, username) //保存记录信息
- }
- } else if b && state == Sp_state_3 { //审核通过
- taskid = checkTask(codeArr, 3)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 3)
- UpTaskState(taskid, 3, "", int64(0))
- SaveRemark(taskid, "", username)
- }
- } else if b && state == Sp_state_6 { //下架
- //下架成功删除download数据
- flag := delDownloadData(code)
- log.Println(code, "---下架删除download数据:", flag)
- } else if b && state == Sp_state_7 { //反馈
- taskid = checkTask(codeArr, 7)
- if len(taskid) > 0 {
- UpTaskState(taskid, 7, reason, xgTime)
- }
- }
- if err != nil {
- res["err"] = err.Error()
- f.ServeJson(res)
- } else {
- res["err"] = ""
- f.ServeJson(res)
- }
- } else {
- f.ServeJson(res)
- }
- return nil
- }
- //下架删除download数据
- func delDownloadData(code string) bool {
- return mgu.Del("download", "spider", "spider", `{"code":"`+code+`"}`)
- }
- //批量作废删除download数据
- func disableDelDownloadData(code []string) {
- for _, v := range code {
- flag := delDownloadData(v)
- log.Println(code, "---批量删除download数据:", flag)
- }
- }
- //爬虫核对
- func (f *Front) Checktime() {
- code := f.GetString("code")
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson(false)
- } else {
- b := mgdb.Update("luaconfig", `{"code":"`+code+`"}`, `{"$set":{"l_checktime":`+fmt.Sprint(time.Now().Unix())+`}}`, true, false)
- f.ServeJson(b)
- }
- }
- //批量作废
- func (f *Front) Disables() error {
- auth := qu.IntAll(f.GetSession("auth"))
- names := strings.Split(f.GetString("names"), ",")
- ids := strings.Split(f.GetString("ids"), ",")
- codes := strings.Split(f.GetString("codes"), ",")
- disablereason := f.GetString("disablereason")
- res := ""
- if IsHasUpState(auth, Sp_state_4) {
- for k, id := range ids {
- b, err := UpStateAndUpSpider("", id, disablereason, "", Sp_state_4)
- if b { //作废成功
- //修改任务状态
- UpTaskState(codes, 4, "", int64(0))
- //删除download表数据
- //go disableDelDownloadData(codes)
- if err != nil {
- res = res + names[k] + ",ok" + qu.ObjToString(err.Error()) + ";"
- } else {
- res = res + names[k] + ",ok" + ";"
- }
- } else {
- res = res + names[k] + "," + qu.ObjToString(err.Error()) + ";"
- }
- }
- } else {
- res = "没有权限"
- }
- f.ServeJson(res)
- return nil
- }
- //批量上下架
- func (f *Front) BatchShelves() {
- codes := strings.Split(f.GetString("codes"), ",")
- state, _ := f.GetInteger("state")
- auth := qu.IntAll(f.GetSession("auth"))
- errCode := []string{}
- var err error
- b := false
- if IsHasUpState(auth, Sp_state_5) {
- if state == 5 { //批量上架
- for _, code := range codes {
- _, err = UpStateAndUpSpider(code, "", "", "", Sp_state_5)
- if err != nil {
- errCode = append(errCode, code)
- }
- }
- } else { //批量下架
- for _, code := range codes {
- b, err = UpStateAndUpSpider(code, "", "", "", Sp_state_6)
- if err != nil {
- errCode = append(errCode, code)
- }
- //下架删除download数据
- if b {
- flag := delDownloadData(code)
- log.Println(code, "---删除download数据:", flag)
- }
- }
- }
- } else {
- errCode = append(errCode, "没有权限")
- }
- f.ServeJson(errCode)
- }
- //更新爬虫状态,并判断是否更新节点爬虫
- func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, error) {
- upresult := false
- var err error
- one := map[string]interface{}{}
- if code != "" {
- one = *mgdb.FindOne("luaconfig", bson.M{"code": code})
- } else {
- one = *mgdb.FindOne("luaconfig", bson.M{"_id": bson.ObjectIdHex(id)})
- code = one["code"].(string)
- }
- if len(one) > 0 {
- var event int
- if one["event"] != nil {
- event = qu.IntAll(one["event"])
- } else {
- for k, _ := range util.Config.Uploadevents { //?
- event = qu.IntAll(k)
- break
- }
- //r := rand.New(rand.NewSource(time.Now().UnixNano()))
- //event = util.Config.Uploadevents[r.Intn(len(util.Config.Uploadevents))]
- }
- //oldstate := qu.IntAll(one["state"])
- switch state {
- case Sp_state_4: //作废
- // if oldstate == Sp_state_5 {
- // upresult = false
- // err = errors.New("已上架不允许作废")
- // } else {
- // upresult = true
- // }
- upresult, err = spider.UpdateSpiderByCodeState(code, "6", event) //下架
- case Sp_state_5, Sp_state_6: //上下架
- upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event)
- //log.Println(upresult, err)
- default:
- upresult = true
- err = nil
- }
- if err != nil && strings.Contains(err.Error(), "timeout") {
- err = errors.New("连接节点" + fmt.Sprint(event) + "超时")
- upresult = true
- }
- if upresult && err == nil {
- upset := bson.M{"state": state} //修改状态
- if one["oldlua"] != nil { //老脚本上传
- upresult = mgdb.Update("luaconfig", bson.M{"code": code}, bson.M{"$set": upset}, true, false)
- } else {
- if state == Sp_state_1 { //提交审核
- upset["l_complete"] = time.Now().Unix()
- upset["report"] = ""
- } else if state == Sp_state_3 { //发布
- if one["event"] == nil {
- upset["event"] = event
- upset["modifytime"] = time.Now().Unix()
- }
- upset["l_uploadtime"] = time.Now().Unix()
- } else if state == Sp_state_2 { //打回原因
- upset["reason"] = reason
- } else if state == Sp_state_7 { //反馈问题
- upset["report"] = reason
- upset["state"] = 1 //反馈后爬虫改为待审核
- } else if state == Sp_state_5 { //上架,核对时间重置
- upset["l_checktime"] = 0
- } else if state == Sp_state_4 { //作废,作废原因
- upset["disablereason"] = reason
- upset["modifytime"] = time.Now().Unix()
- }
- upresult = mgdb.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false)
- if state == Sp_state_1 { //提交审核,验证是否提交成功
- for i := 1; i <= 5; i++ { //解决提交不上,重试5次
- lua := *mgdb.FindOne("luaconfig", map[string]interface{}{"code": code})
- tmpState := qu.IntAll(lua["state"])
- if state == tmpState {
- break
- } else {
- upresult = mgdb.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false)
- upresult = false
- }
- }
- }
- qu.Debug("提交日志:", code, upset, upresult)
- if upresult && (state == Sp_state_2 || state == Sp_state_3) { //打回、审核记录日志
- types := "打回"
- if state == Sp_state_3 {
- types = "审核"
- }
- obj := bson.M{
- "code": code,
- "auditor": username,
- "types": types,
- "comeintime": time.Now().Unix(),
- "reason": reason,
- "spideruser": one["createuser"],
- "modifytime": one["modifytime"],
- }
- mgdb.Save("lua_logs_auditor", obj)
- }
- }
- }
- }
- return upresult, err
- }
- //保存记录信息
- func SaveRemark(taskid []string, reason, username string) {
- timeNow := time.Now().Unix()
- if reason == "" {
- reason = "审核通过"
- }
- for _, id := range taskid {
- query := bson.M{
- "_id": bson.ObjectIdHex(string(id)),
- }
- task := *mgdb.FindOne("task", query)
- if task != nil {
- checkData := task["a_check"]
- var checkArr []map[string]interface{}
- newData := make(map[string]interface{})
- newData["s_check_checkUser"] = username
- newData["l_check_checkTime"] = timeNow
- newData["s_check_checkRemark"] = reason
- if checkData != nil {
- myArr := qu.ObjArrToMapArr(checkData.([]interface{}))
- if myArr != nil && len(myArr) > 0 {
- for _, v := range myArr {
- checkArr = append(checkArr, v)
- }
- }
- }
- checkArr = append(checkArr, newData)
- task["a_check"] = checkArr
- mgdb.Update("task", query, map[string]interface{}{
- "$set": task,
- }, false, false)
- }
- }
- }
- //修改任务状态
- func UpTaskState(code []string, num int, reason string, startTime int64) {
- query := bson.M{}
- update := bson.M{}
- for _, v := range code {
- if num == 1 || num == 2 || num == 3 || num == 7 {
- query = bson.M{
- "_id": bson.ObjectIdHex(v),
- }
- } else {
- query = bson.M{
- "s_code": v,
- }
- }
- if num == 1 { //提交审核
- update = bson.M{
- "$set": bson.M{
- "i_state": 3,
- },
- }
- } else if num == 2 { //打回 -->未通过
- update = bson.M{
- "$set": bson.M{
- "i_state": 5,
- },
- }
- } else if num == 3 { //发布(审核通过) -->审核通过
- update = bson.M{
- "$set": bson.M{
- "i_state": 4,
- },
- }
- } else if num == 4 { //批量作废 -->关闭
- update = bson.M{
- "$set": bson.M{
- "i_state": 6,
- "l_complete": time.Now().Unix(),
- },
- }
- } else if num == 7 { //反馈信息 -->待审核
- newData := map[string]interface{}{
- "l_mrecord_comeintime": startTime,
- "l_mrecord_complete": time.Now().Unix(),
- "s_mrecord_remark": reason,
- }
- mrecord := []interface{}{}
- mrecord = append(mrecord, newData)
- update = bson.M{
- "$set": bson.M{
- "i_state": 3,
- "l_complete": time.Now().Unix(),
- "a_mrecord": mrecord,
- },
- }
- }
- flag := mgdb.Update("task", query, update, false, true)
- log.Println("codeOrId:", query, " 修改任务状态:", flag)
- }
- }
- //更新节点
- func (f *Front) ChangeEvent() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson("没有权限")
- }
- code := f.GetString("code")
- event, _ := f.GetInt("event")
- eventok := false
- for k, _ := range util.Config.Uploadevents {
- if event == qu.Int64All(k) {
- eventok = true
- break
- }
- }
- if !eventok {
- f.ServeJson("没有对应节点")
- return
- }
- info := *mgdb.FindOne("luaconfig", `{"code":"`+code+`"}`)
- if len(info) > 0 {
- oldevent := qu.IntAll(info["event"])
- if qu.IntAll(info["state"]) == Sp_state_5 {
- //源节点下架
- _, err := spider.UpdateSpiderByCodeState(code, fmt.Sprint(Sp_state_6), oldevent)
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- "state": Sp_state_6,
- },
- }
- mgdb.Update("luaconfig", `{"code":"`+code+`"}`, set, true, false)
- if err != nil && strings.Contains(err.Error(), "timeout") {
- f.ServeJson("连接节点" + fmt.Sprint(oldevent) + "超时")
- } else {
- f.ServeJson(err.Error())
- }
- } else {
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- },
- }
- mgdb.Update("luaconfig", `{"code":"`+code+`"}`, set, true, false)
- }
- } else {
- f.ServeJson("没有对应记录")
- }
- }
- //验证用户是否有更改状态权限
- func IsHasUpState(auth, state int) bool {
- rep := false
- switch auth {
- case role_dev:
- if state == Sp_state_1 || state == Sp_state_7 {
- rep = true
- }
- case role_examine:
- if state == Sp_state_2 || state == Sp_state_3 {
- rep = true
- }
- case role_admin:
- rep = true
- default:
- }
- return rep
- }
- var list_fields = `{"_id":1,"code":1,"createuser":1,"modifyuser":1,"modifytime":1,"l_uploadtime":1,"l_checktime":1,"state":1,"param_common":1,"event":1,"urgency":1,"platform":1}`
- //脚本管理,结合爬虫运行信息
- func (f *Front) LuaList() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson("没有权限!")
- return
- }
- if f.Method() == "POST" {
- state, _ := f.GetInteger("state")
- event, _ := f.GetInteger("event")
- start, _ := f.GetInteger("start")
- limit, _ := f.GetInteger("length")
- draw, _ := f.GetInteger("draw")
- searchStr := f.GetString("search[value]")
- //search := strings.Replace(searchStr, " ", "", -1)
- search := strings.TrimSpace(searchStr)
- query := bson.M{}
- q1 := bson.M{}
- q1["$or"] = []interface{}{
- bson.M{"code": bson.M{"$regex": search}},
- bson.M{"createuser": bson.M{"$regex": search}},
- bson.M{"param_common.1": bson.M{"$regex": search}},
- }
- q2 := bson.M{}
- if state > -1 {
- q2 = bson.M{"state": state}
- } else {
- q2["$or"] = []interface{}{
- bson.M{"state": Sp_state_3},
- bson.M{"state": Sp_state_5},
- bson.M{"state": Sp_state_6},
- }
- }
- q3 := bson.M{}
- if event > -1 {
- q3 = bson.M{"event": event}
- }
- if search != "" {
- query["$and"] = []interface{}{q1, q2, q3}
- } else {
- query["$and"] = []interface{}{q2, q3}
- }
- sort := `{"%s":%d}`
- orderIndex := f.GetString("order[0][column]")
- orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex))
- orderType := 1
- if f.GetString("order[0][dir]") != "asc" {
- orderType = -1
- }
- sort = fmt.Sprintf(sort, orderName, orderType)
- page := start / 10
- luas := *mgdb.Find("luaconfig", query, sort, list_fields, false, start, limit)
- count := mgdb.Count("luaconfig", query)
- for k, v := range luas {
- v["num"] = k + 1 + page*10
- l_uploadtime := qu.Int64All(v["l_uploadtime"])
- v["l_uploadtime"] = qu.FormatDateByInt64(&l_uploadtime, qu.Date_Full_Layout)
- l_checktime := qu.Int64All(v["l_checktime"])
- v["l_checktime"] = qu.FormatDateByInt64(&l_checktime, qu.Date_Full_Layout)
- if l_checktime > 0 { //核对
- v["is_check"] = true
- } else { //未核对
- v["is_check"] = false
- }
- if tmp, ok := spinfos.Load(v["code"]); ok {
- info := tmp.(*spinfo)
- v["modifytime"] = info.lastHeartbeat
- v["yesterday"] = fmt.Sprint(info.yesterdayDowncount) + "/" + fmt.Sprint(info.yestoDayRequestNum)
- v["terday"] = fmt.Sprint(info.todayDowncount) + "/" + fmt.Sprint(info.toDayRequestNum)
- v["lastdowncount"] = info.lastDowncount
- v["lstate"] = info.lstate
- } else {
- v["modifytime"] = ""
- v["yesterday"] = ""
- v["terday"] = ""
- v["lastdowncount"] = 0
- v["lstate"] = ""
- }
- }
- f.ServeJson(map[string]interface{}{"draw": draw, "data": luas, "recordsFiltered": count, "recordsTotal": count})
- } else {
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- f.Render("lualist.html", &f.T)
- }
- }
- //爬虫信息
- type spinfo struct {
- code string
- todayDowncount, toDayRequestNum int
- yesterdayDowncount, yestoDayRequestNum int
- totalDowncount, totalRequestNum int
- errorNum, roundCount, runRate int
- lastDowncount int
- lastHeartbeat string
- lstate string
- }
- //爬虫信息
- func SpiderInfo(data string) {
- data = util.Se.DecodeString(data)
- infos := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &infos)
- if err != nil {
- return
- }
- for _, tmp := range infos {
- lastHeartbeat := qu.Int64All(tmp["lastHeartbeat"])
- info := &spinfo{
- code: fmt.Sprint(tmp["code"]),
- todayDowncount: qu.IntAll(tmp["todayDowncount"]),
- toDayRequestNum: qu.IntAll(tmp["toDayRequestNum"]),
- yesterdayDowncount: qu.IntAll(tmp["yesterdayDowncount"]),
- yestoDayRequestNum: qu.IntAll(tmp["yestoDayRequestNum"]),
- totalDowncount: qu.IntAll(tmp["totalDowncount"]),
- totalRequestNum: qu.IntAll(tmp["totalRequestNum"]),
- errorNum: qu.IntAll(tmp["errorNum"]),
- roundCount: qu.IntAll(tmp["roundCount"]),
- runRate: qu.IntAll(tmp["runRate"]),
- lastHeartbeat: qu.FormatDateByInt64(&lastHeartbeat, qu.Date_Full_Layout),
- lastDowncount: qu.IntAll(tmp["lastDowncount"]),
- lstate: fmt.Sprint(tmp["lstate"]),
- }
- spinfos.Store(info.code, info)
- //log.Println(info)
- }
- }
- //接受维护任务信息
- func SpiderModifyTask(data string) {
- data = util.Se.DecodeString(data)
- mtasks := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &mtasks)
- if err != nil {
- return
- }
- for k, tmp := range mtasks {
- log.Println(k, tmp)
- }
- }
- //查看是否有该任务
- func checkTask(codes []string, num int) []string {
- // var id string = ""
- query := bson.M{}
- var idArr []string
- if len(codes) > 0 {
- for _, v := range codes {
- if num == 1 {
- query = bson.M{
- "s_code": v,
- "i_state": bson.M{
- "$in": []int{1, 2, 5},
- },
- }
- } else if num == 2 { //打回时查询待审核的任务
- query = bson.M{
- "s_code": v,
- "i_state": 3,
- }
- } else if num == 3 { //审核通过时查询待处理、处理中、待审核、未通过的任务
- query = bson.M{
- "s_code": v,
- "i_state": bson.M{
- "$in": []int{1, 2, 3, 5},
- },
- }
- } else if num == 7 {
- query = bson.M{
- "s_code": v,
- "i_state": bson.M{
- "$in": []int{2, 5},
- },
- }
- }
- //task := *mgdb.FindOne("task", query)
- task := *mgdb.Find("task", query, nil, nil, false, -1, -1)
- if task != nil {
- for _, v := range task {
- //id = v["_id"].(bson.ObjectId).Hex()
- idArr = append(idArr, v["_id"].(bson.ObjectId).Hex())
- }
- }
- return idArr
- }
- }
- return idArr
- }
|