1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801 |
- package front
- import (
- "encoding/json"
- "errors"
- "fmt"
- "log"
- "mongodb"
- "regexp"
- "sort"
- "spider"
- "strconv"
- mu "mfw/util"
- qu "qfw/util"
- util "spiderutil"
- "strings"
- "time"
- u "util"
- )
- type Base struct {
- SpiderCode string
- SpiderCodeOld string
- SpiderName string
- SpiderChannel string
- SpiderDownDetailPage bool
- SpiderStartPage int
- SpiderMaxPage int
- SpiderRunRate int
- Spider2Collection string
- SpiderPageEncoding string
- SpiderStoreMode int //1,2
- SpiderStoreToMsgEvent int
- SpiderTargetChannelUrl string
- SpiderLastDownloadTime string
- SpiderIsHistoricalMend bool
- SpiderIsMustDownload bool
- }
- type Step1 struct {
- Address string
- ContentChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step2 struct {
- Listadd string
- Listadds string
- BlockChooser string
- AddressChooser string
- TitleChooser string
- DateChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step3 struct {
- ContentChooser string
- ElementChooser string
- T_title string
- T_href string
- T_date string
- Expert string
- Types int
- }
- type StepRe3 struct {
- Checked bool
- Expert string
- }
- type OtherBase struct {
- IsFlow int //爬虫所采集数据是否参与数据流程标识
- SpiderType string //爬虫类型:increment增量;history历史
- SpiderHistoryMaxPage int //采集历史数据时的采集最大页
- SpiderMoveEvent string //爬虫采集完历史后要转移到的节点 comm:队列模式、bid:高性能模式
- }
- //加载某个爬虫
- func (f *Front) LoadSpider(codeTaskIdReState string) error {
- tmpStr := strings.Split(codeTaskIdReState, "__")
- code := tmpStr[0]
- taskId := tmpStr[1]
- auth := qu.IntAll(f.GetSession("auth"))
- restate := -1
- if taskId == "restate=1" { //重采编辑
- restate = 1
- } else if taskId == "restate=2" {
- restate = 2
- } else if taskId == "restate=3" {
- restate = 3
- } else {
- if auth == u.Role_Dev && qu.ObjToString(f.GetSession(taskId)) == "" {
- xgTime := time.Unix(time.Now().Unix(), 0).Format("2006-01-02 15:04:05")
- f.SetSession(taskId, xgTime)
- }
- }
- copy := f.GetString("copy")
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(code)
- f.T["actiontext"] = "编辑"
- //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- auth := qu.IntAll(f.GetSession("auth"))
- if qu.ObjToString((*lua)["createuserid"]) == f.GetSession("userid").(string) || auth >= 1 {
- if len(*lua) > 0 {
- if qu.IntAll((*lua)["event"]) == 7000 && qu.IntAll((*lua)["urgency"]) == 0 && qu.IntAll((*lua)["state"]) == 0 {
- q := map[string]interface{}{
- "event": 7000,
- "state": 0,
- "urgency": 1,
- "modifyuserid": f.GetSession("userid"),
- }
- if u.MgoEB.Count("luaconfig", q) > 0 {
- f.Write("名下还有7000节点待完成的紧急爬虫,暂无法处理该爬虫!")
- return nil
- }
- }
- if copy != "" {
- //luacopy, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": copy})
- luacopy, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": copy})
- if len(*luacopy) > 0 {
- (*lua)["model"] = (*luacopy)["model"]
- common_copy := (*luacopy)["param_common"].([]interface{})
- common := (*lua)["param_common"].([]interface{})
- common_copy[0] = common[0]
- common_copy[1] = common[1]
- common_copy[2] = common[2]
- common_copy[11] = common[11]
- (*lua)["param_common"] = (*luacopy)["param_common"]
- (*lua)["param_time"] = (*luacopy)["param_time"]
- (*lua)["param_list"] = (*luacopy)["param_list"]
- (*lua)["param_content"] = (*luacopy)["param_content"]
- (*lua)["str_list"] = (*luacopy)["str_list"]
- (*lua)["str_time"] = (*luacopy)["str_time"]
- (*lua)["str_content"] = (*luacopy)["str_content"]
- (*lua)["Thref"] = (*luacopy)["Thref"]
- (*lua)["Tpublishtime"] = (*luacopy)["Tpublishtime"]
- (*lua)["Ttitle"] = (*luacopy)["Ttitle"]
- (*lua)["Tdate"] = (*luacopy)["Tdate"]
- (*lua)["type_content"] = (*luacopy)["type_content"]
- (*lua)["type_list"] = (*luacopy)["type_list"]
- (*lua)["type_time"] = (*luacopy)["type_time"]
- }
- }
- if (*lua)["listcheck"] != nil {
- listcheck := (*lua)["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- (*lua)["listcheck"] = listcheck
- }
- if (*lua)["contentcheck"] != nil {
- contentcheck := (*lua)["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- (*lua)["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent((*lua)["model"], "", " ")
- (*lua)["js"] = string(js)
- f.T["lua"] = lua
- f.T["taskId"] = taskId
- f.T["restate"] = restate
- f.T["isflow"] = (*lua)["isflow"]
- f.T["spidertype"] = (*lua)["spidertype"]
- f.T["spidermovevent"] = (*lua)["spidermovevent"]
- f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"]
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- if (*lua)["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spideredit.html", &f.T)
- }
- } else {
- f.Write("您没有编辑他人脚本的权限")
- }
- }
- return nil
- }
- //查看某个爬虫
- func (f *Front) ViewSpider(id string) error {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth >= 1 {
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(id)
- f.T["actiontext"] = "编辑"
- //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- if len(*lua) > 0 {
- if (*lua)["listcheck"] != nil {
- listcheck := (*lua)["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- (*lua)["listcheck"] = listcheck
- }
- if (*lua)["contentcheck"] != nil {
- contentcheck := (*lua)["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- (*lua)["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent((*lua)["model"], "", " ")
- (*lua)["js"] = string(js)
- f.T["lua"] = lua
- f.T["isflow"] = (*lua)["isflow"]
- f.T["spidertype"] = (*lua)["spidertype"]
- f.T["spidermovevent"] = (*lua)["spidermovevent"]
- f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"]
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- if (*lua)["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spiderview.html", &f.T)
- } else {
- f.Write("没有对应记录!")
- return nil
- }
- }
- return f.Redirect("/center")
- } else {
- f.Write("您没有查看他人脚本的权限")
- return nil
- }
- }
- func (f *Front) LoadModel(id string) error {
- if f.Method() == "GET" {
- //lua, _ := u.MgoE.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1)
- lua, _ := u.MgoEB.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1)
- if len(*lua) > 0 {
- f.ServeJson((*lua)[0])
- }
- }
- return f.Redirect("/center")
- }
- func (f *Front) SaveStep() {
- userid, _ := f.GetSession("userid").(string)
- auth := qu.IntAll(f.GetSession("auth"))
- rep := map[string]interface{}{}
- if f.GetString("oldlua") != "" {
- id := f.GetString("code")
- //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": id})
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": id})
- id = qu.ObjToString((*one)["code"])
- script := f.GetStringComm("script")
- if strings.Index(script, id) == -1 {
- rep["msg"] = "code/名称都不能更改"
- f.ServeJson(rep)
- return
- } else {
- upset := map[string]interface{}{"luacontent": script}
- upset["modifytime"] = time.Now().Unix()
- //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false)
- b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false)
- if b {
- rep["msg"] = "保存成功"
- rep["code"] = util.Se.Encode2Hex(id)
- f.ServeJson(rep)
- return
- }
- }
- } else {
- if f.Base.SpiderName != "" && f.Base.SpiderCode != "" {
- code := f.Base.SpiderCode
- //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode})
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode})
- //记录上架操作前的第一次保存时的爬虫历史
- user := f.GetSession("username").(string)
- LuaSaveLog(f.Base.SpiderCode, user, one, 0)
- state := qu.IntAllDef((*one)["state"], 0)
- restate := qu.IntAll((*one)["restate"])
- infoformat := qu.IntAll((*one)["infoformat"])
- comeintime := time.Now().Unix()
- if len((*one)) > 0 {
- comeintime = qu.Int64All((*one)["comeintime"])
- ouserid := qu.ObjToString((*one)["createuserid"])
- if ouserid != userid && auth == u.Role_Dev {
- f.Write("权限不够,不能修改他人脚本")
- return
- } else {
- code = qu.ObjToString((*one)["code"])
- f.Base.SpiderCode = code
- f.Base.SpiderName = ((*one)["param_common"].([]interface{}))[1].(string)
- }
- } else {
- if auth != u.Role_Admin {
- f.Write("不能新建爬虫,请联系管理员导入")
- return
- }
- }
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- if auth == u.Role_Dev {
- //f.Base.SpiderStoreToMsgEvent = 4002
- }
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- }
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- param := map[string]interface{}{}
- common[4] = 1
- param["param_common"] = common
- param["channel"] = f.Base.SpiderChannel
- param["href"] = f.Base.SpiderTargetChannelUrl
- //向导模式
- param["param_time"] = ptime
- param["param_list"] = list
- param["param_content"] = content
- param["type_time"] = f.Step1.Types
- param["type_list"] = f.Step2.Types
- param["type_content"] = f.Step3.Types
- //专家模式
- param["str_time"] = f.Step1.Expert
- param["str_list"] = f.Step2.Expert
- param["str_content"] = f.Step3.Expert
- param["comeintime"] = comeintime
- listcheck = strings.Replace(listcheck, "\n", "\\\\n", -1)
- param["listcheck"] = strings.Replace(listcheck, "\"", "\\\\\"", -1)
- contentcheck = strings.Replace(contentcheck, "\n", "\\\\n", -1)
- param["contentcheck"] = strings.Replace(contentcheck, "\"", "\\\\\"", -1)
- //补充模型
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- model["model"] = s_model
- param["code"] = f.Base.SpiderCode
- param["model"] = model
- if len((*one)) > 0 {
- param["createuser"] = (*one)["createuser"]
- param["createuserid"] = (*one)["createuserid"]
- param["code"] = (*one)["code"]
- //开发员关联任务修改爬虫状态
- state = qu.IntAll((*one)["state"])
- if auth == u.Role_Dev && state >= Sp_state_3 && restate != 1 { //开发员修改,已经审核通过(不包含已上架),状态重置为待完成(restate!=1判断,重采修改保存爬虫时不修改爬虫状态)
- param["state"] = 0
- } else {
- param["state"] = state
- }
- } else {
- param["createuser"] = f.GetSession("loginuser")
- param["createuserid"] = f.GetSession("userid")
- param["createuseremail"] = f.GetSession("email")
- param["next"] = f.GetSession("email")
- param["state"] = 0
- }
- if qu.ObjToString((*one)["modifyuser"]) == "" {
- param["modifyuser"] = param["createuser"]
- param["modifyuserid"] = param["createuserid"]
- }
- param["modifytime"] = time.Now().Unix()
- param["Ttitle"] = f.Step3.T_title
- param["Thref"] = f.Step3.T_href
- param["Tdate"] = f.Step3.T_date
- //其他信息
- param["isflow"] = f.OtherBase.IsFlow
- param["spidertype"] = f.OtherBase.SpiderType
- param["spiderhistorymaxpage"] = f.OtherBase.SpiderHistoryMaxPage
- qu.Debug(f.OtherBase.SpiderMoveEvent)
- tmpEvent, err := strconv.Atoi(f.OtherBase.SpiderMoveEvent) //f.OtherBase.SpiderMoveEvent此处SpiderMoveEvent已不表示comm、bid,表示增量的节点
- if f.OtherBase.SpiderType == "history" { //爬虫类型是history的放到7000节点,并记录历史节点
- param["event"] = 7000
- if err == nil {
- param["incrementevent"] = tmpEvent //开发人员切换增量节点
- } else if event := qu.IntAll((*one)["event"]); event != 7000 { //默认增量节点
- param["incrementevent"] = event
- }
- param["urgency"] = 1 //保存到7000时,爬虫紧急度变为紧急(控制7000节点爬虫紧急未写完不能写普通)
- } else if f.OtherBase.SpiderType == "increment" && err == nil { //增量
- param["event"] = tmpEvent //开发人员切换增量节点
- }
- if movevent, ok := util.Config.Uploadevents[f.OtherBase.SpiderMoveEvent].(string); ok && movevent != "" {
- param["spidermovevent"] = movevent
- }
- //开发人员修改爬虫节点后,在审核人员上架时,要在原来的节点下架,临时记录要下架的节点downevent
- if event := qu.IntAll((*one)["event"]); event != tmpEvent && event != 7000 {
- param["downevent"] = event
- }
- //三级页复制
- param["str_recontent"] = f.StepRe3.Expert
- param["iscopycontent"] = f.StepRe3.Checked
- //
- param["listisfilter"] = ListFilterReg.MatchString(f.Step2.Expert) //列表页校验是否含“--关键词过滤”
- matchLua, msg := LuaTextCheck(f.Step2.Expert, f.Step3.Expert, f.Step2.Types, infoformat, model)
- if !matchLua {
- issave := spider.SaveSpider(code, param) //保存脚本
- if issave {
- for k, v := range *one {
- if k != "_id" && param[k] == nil {
- param[k] = v
- }
- }
- Wlog(f.Base.SpiderName, f.Base.SpiderCode, user, f.GetSession("userid").(string), "修改", param)
- rep["msg"] = "保存成功"
- } else {
- rep["msg"] = "保存失败"
- }
- } else {
- rep["msg"] = "保存失败," + msg
- }
- rep["code"] = util.Se.Encode2Hex(code)
- f.ServeJson(rep)
- }
- }
- }
- func (f *Front) SaveChannels() {
- code := f.GetString("code")
- channels := f.GetString("channels")
- channels = strings.ReplaceAll(channels, ",", ",")
- arr := strings.Split(channels, ",")
- ok := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{
- "$addToSet": map[string]interface{}{
- "channels": map[string]interface{}{"$each": arr},
- },
- }, false, false)
- f.ServeJson(map[string]interface{}{"ok": ok})
- }
- func (f *Front) SaveJs() {
- //param_type := f.GetString("param_type")
- //return_type := f.GetString("return_type")
- js_name := f.GetString("js_name")
- js_alias := f.GetString("js_alias")
- jstext := f.GetString("jstext")
- step := f.GetString("step")
- code := f.GetString("code")
- update := map[string]interface{}{
- "runjs": true,
- }
- js := map[string]interface{}{
- "js_text": jstext,
- "js_name": js_name,
- "js_step": step,
- "js_alias": js_alias,
- "js_param": "string",
- "js_return": "string",
- }
- if step == "list" {
- update["js_list"] = js
- } else if step == "detail" {
- update["js_detail"] = js
- }
- u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": update}, false, false)
- f.ServeJson(map[string]interface{}{"ok": true})
- }
- func LuaSaveLog(code, user string, data *map[string]interface{}, stype int) {
- saveOne, _ := u.MgoEB.FindOne("luasavelog", map[string]interface{}{"state": 0, "code": code})
- if stype == 0 { //保存记录
- if len(*saveOne) == 0 && len(*data) > 0 { //重新记录
- delete(*data, "_id")
- save := map[string]interface{}{
- "code": code,
- "state": 0,
- "saveuser": user,
- "comeintime": time.Now().Unix(),
- "luaold": data,
- }
- u.MgoEB.Save("luasavelog", save)
- }
- } else if stype == 1 { //对比
- if len(*saveOne) > 0 {
- tmp := (*saveOne)["luaold"].(map[string]interface{})
- updateMap := map[string]interface{}{} //记录字段改变值
- for k, v := range *data {
- if k != "_id" && k != "state" && k != "modifytime" {
- if tmpV := tmp[k]; tmpV != nil { //历史记录存在字段
- tmpJson, _ := json.Marshal(tmpV)
- dataJson, _ := json.Marshal(v)
- if string(tmpJson) != string(dataJson) {
- updateMap[k] = v
- }
- delete(tmp, k) //删除对比过的字段
- } else { //历史记录不存在字段
- updateMap[k] = v
- }
- }
- }
- if len(tmp) > 0 {
- for k, _ := range tmp { //上架时爬虫较历史爬虫少的字段信息
- updateMap[k] = nil
- }
- }
- set := map[string]interface{}{"state": 1, "updatetime": time.Now().Unix(), "updateuser": user}
- if len(updateMap) > 0 { //有字段改变
- set["luaupdate"] = updateMap
- set["lusnew"] = data
- }
- u.MgoEB.UpdateById("luasavelog", (*saveOne)["_id"], map[string]interface{}{"$set": set})
- }
- }
- }
- //爬虫保存时,检查列表页和三级页代码中是否含lua原生方法
- func LuaTextCheck(list, detail string, type_list, infoformat int, model map[string]interface{}) (b bool, msg string) {
- defer qu.Catch()
- if LuaReg.MatchString(list) || LuaReg.MatchString(detail) {
- msg = "代码中含有lua原生方法;"
- }
- if ListFilterReg.MatchString(detail) && !strings.Contains(detail, "delete") { //三级页含过滤但是没有data["delete"]="true"
- msg = `三级页缺少data["delete"]="true"`
- }
- sln_reg := regexp.MustCompile(`sendListNum\(pageno,list\)`)
- slnIndexArr := sln_reg.FindAllStringIndex(list, -1)
- if type_list != 0 && len(slnIndexArr) == 0 { //列表页专家模式且不含sendListNum
- msg = "代码中缺少sendListNum(pageno,list)方法;" + msg
- } else if type_list == 1 && len(slnIndexArr) > 0 { //判断sendListNum方法的位置
- trim_reg := regexp.MustCompile("trim")
- insert_reg := regexp.MustCompile("insert")
- trIndexArr := trim_reg.FindAllStringIndex(list, -1)
- irIndexArr := insert_reg.FindAllStringIndex(list, -1)
- slIndex := slnIndexArr[len(slnIndexArr)-1] //sendListNum位置
- trIndex := trIndexArr[len(trIndexArr)-1] //com.trim位置
- irIndex := irIndexArr[len(irIndexArr)-1] //insert位置
- qu.Debug("sendListNum位置:", trIndex, slIndex, irIndex)
- if slIndex[1] < trIndex[0] || slIndex[0] > irIndex[1] { //sendListNum方法必须在com.trim方法后,table.insert方法前
- msg = "sendListNum方法位置错误;" + msg
- }
- }
- if type_list == 1 {
- area := qu.ObjToString(model["area"])
- city := qu.ObjToString(model["city"])
- district := qu.ObjToString(model["district"])
- if area != "" && !strings.Contains(list, area) {
- msg += "省份信息与模板不一致;"
- }
- if city != "" && !strings.Contains(list, city) {
- msg += "城市信息与模板不一致;"
- }
- if district != "" && !strings.Contains(list, district) {
- msg += "区/县信息与模板不一致;"
- }
- if infoformat == 2 && !strings.Contains(detail, "projectname") {
- msg += "拟建/审批数据缺少projectname字段;"
- }
- }
- b = msg != ""
- return
- }
- //方法测试
- func (f *Front) RunStep() {
- imodal, _ := f.GetInteger("imodal")
- script, _ := f.GetBool("script")
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- downloadnode := f.GetString("downloadnode") //下载节点
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- "",
- "",
- "",
- }
- if f.Method() == "POST" {
- switch f.GetString("step") {
- case "step1": //publishtime
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- if script {
- _, scripts := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal, 1)
- f.ServeJson(scripts)
- return
- }
- rs, err := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- }
- case "step2": //list
- addrs := strings.Split(f.Step2.Listadds, "\n")
- if len(addrs) > 0 {
- for k, v := range addrs {
- addrs[k] = "'" + v + "'"
- }
- f.Step2.Listadds = strings.Join(addrs, ",")
- } else if len(f.Step2.Listadds) > 5 {
- f.Step2.Listadds = "'" + f.Step2.Listadds + "'"
- } else {
- f.Step2.Listadds = ""
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- listcheck = strings.Replace(listcheck, "\n", "\\n", -1)
- listcheck = strings.Replace(listcheck, "\"", "\\\"", -1)
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- if script {
- _, script := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- } else if err.(error).Error() == "no" {
- f.ServeJson(rs[0])
- }
- case "step3": //detail
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- contentcheck = strings.Replace(contentcheck, "\n", "\\n", -1)
- contentcheck = strings.Replace(contentcheck, "\"", "\\\"", -1)
- data := map[string]interface{}{}
- data["title"] = f.Step3.T_title
- data["href"] = f.Step3.T_href
- data["publishtime"] = f.Step3.T_date
- if script {
- _, script := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal)
- if projectinfo, ok := rs["projectinfo"].(map[string]interface{}); ok && projectinfo != nil {
- if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok && attachments != nil {
- for _, tmp := range attachments {
- tmpMap := tmp.(map[string]interface{})
- if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" {
- rs["msg"] = "附件中含有乱码"
- }
- }
- }
- }
- if err == nil {
- f.ServeJson(rs)
- } else {
- f.ServeJson(rs["no"])
- }
- }
- }
- }
- //爬虫测试数据json
- func (f *Front) GetJson() {
- code := f.GetString("code")
- username := f.GetSession("username").(string)
- if tr := TestResultMap[username+code]; tr != nil {
- task, _ := u.MgoEB.FindOne("task", map[string]interface{}{"code": code, "i_state": 3})
- comeintime := int64(0)
- if len(*task) > 0 {
- comeintime = qu.Int64All((*task)["l_comeintime"])
- tr.task_remark = "审核任务创建时间:" + qu.FormatDateByInt64(&comeintime, qu.Date_Short_Layout) + ";" + tr.task_remark
- }
- data := tr.dataInfo
- result := tr.listInfo
- if len(data) > 0 {
- data["contenthtml"] = ""
- }
- num := 0
- list_fir := []map[string]interface{}{}
- list_sec := []map[string]interface{}{}
- for page, list := range result {
- for k, v := range list {
- v["a_index"] = k + 1
- num++
- }
- if page == 1 {
- list_fir = list
- } else if page == 2 {
- list_sec = list
- }
- }
- f.T["list_fir"] = list_fir
- f.T["list_sec"] = list_sec
- f.T["data"] = data
- f.T["num"] = num
- f.T["descript"] = tr.task_descript
- f.T["remark"] = tr.task_remark
- f.T["rateremark"] = tr.task_rateremark
- f.T["reason"] = tr.reason
- f.T["msg"] = tr.msg
- f.T["comeintime"] = comeintime
- delete(TestResultMap, username+code)
- }
- f.Render("jsonInfo.html", &f.T)
- }
- var TestResultMap = map[string]*TestResult{} //username+code
- //某个爬虫整体测试结果
- type TestResult struct {
- task_remark string
- task_rateremark []string
- task_descript string
- reason string
- msg string
- listInfo map[int64][]map[string]interface{}
- dataInfo map[string]interface{}
- }
- //整体测试
- func (f *Front) SpiderPass() {
- defer mu.Catch()
- tr := &TestResult{}
- result := map[int64][]map[string]interface{}{}
- data := map[string]interface{}{}
- msgArr := []string{}
- code := f.GetString("code")
- downloadnode := f.GetString("node")
- //根据code查询待确认任务
- query := map[string]interface{}{
- "s_code": code,
- "i_state": 3,
- }
- task, _ := u.MgoEB.FindOne("task", query)
- descript := "null"
- remark := "null"
- remarktmp := []string{}
- rateremarktmp := []string{}
- if len(*task) > 0 {
- descript = (*task)["s_descript"].(string)
- if mrecord, ok := (*task)["a_mrecord"].([]interface{}); ok {
- for _, m := range mrecord {
- remarkInfo := m.(map[string]interface{})
- if remark := qu.ObjToString(remarkInfo["s_mrecord_remark"]); remark != "" {
- remarktmp = append(remarktmp, remark+";")
- }
- if rateremark := qu.ObjToString(remarkInfo["s_mrecord_rateremark"]); rateremark != "" {
- rateremarktmp = append(rateremarktmp, rateremark+";")
- }
- }
- }
- }
- if len(remarktmp) > 0 {
- remark = ""
- remark = strings.Join(remarktmp, "")
- }
- tr.task_remark = remark
- tr.task_rateremark = rateremarktmp
- tr.task_descript = descript
- //基本信息、方法一(发布时间)、方法二(列表页)、方法三(详情页)、总请求次数、go方法一、go方法二、go方法三、列表页条数
- steps := []interface{}{false, false, false, false, 0, 0, 0, 0, 0}
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- reason, _ := (*one)["reason"].(string)
- tr.reason = reason
- if len(*one) > 0 && (*one)["oldlua"] == nil {
- common := (*one)["param_common"].([]interface{})
- if len(common) < 13 {
- f.ServeJson(steps)
- return
- } else {
- steps[0] = true
- }
- } else {
- steps[0] = true
- }
- script, liststr, contentstr := "", "", ""
- if (*one)["oldlua"] == nil {
- script, liststr, contentstr = spider.GetScript(code)
- } else {
- script = (*one)["luacontent"].(string)
- }
- if liststr != "" && contentstr != "" {
- msgArr = u.SpiderPassCheckLua(liststr, contentstr, (*one)) //校验
- }
- s := spider.CreateSpider(downloadnode, script)
- s.SpiderMaxPage = 2 //采集列表页总页数
- s.Timeout = 60
- timestr, timeerr := s.GetLastPublishTime()
- if timeerr == nil && len(timestr) > 4 {
- steps[1] = true //发布时间获取成功
- downloadNum := 0
- result, downloadNum, _ = s.DownListPageItem() //列表页采集结果
- if downloadNum == 0 {
- f.ServeJson(steps)
- return
- }
- steps[2] = true //列表页获取成功
- steps[8] = downloadNum //下载量
- tr.listInfo = result
- if s.DownDetail {
- onePageList := result[1] //第一页数据
- if onePageDataNum := len(onePageList); onePageDataNum > 0 {
- index := onePageDataNum / 2 //取一条数据下载三级页
- param := map[string]string{}
- for k, v := range onePageList[index] {
- param[k] = qu.ObjToString(v)
- }
- data = map[string]interface{}{}
- s.DownloadDetailPage(param, data)
- tr.dataInfo = data
- if len(data) == 0 || qu.ObjToString(data["detail"]) == "" {
- steps[3] = false //详情页获取失败
- } else {
- steps[3] = true //详情页获取成功
- }
- }
- } else {
- steps[3] = true //详情页获取成功
- }
- //list, _ = s.DownListPageItem()
- //for _, l := range list {
- // if publishtime := qu.ObjToString(l["publishtime"]); publishtime == "0" || publishtime == "" {
- // msgArr = append(msgArr, "列表页publishtime取值异常")
- // break
- // } else {
- // t, err := time.ParseInLocation(qu.Date_Full_Layout, publishtime, time.Local)
- // if err != nil || t.Unix() <= 0 {
- // msgArr = append(msgArr, "列表页publishtime取值异常")
- // break
- // }
- // }
- //}
- //if len(list) > 0 {
- // tr.listInfo = list
- // listone := list[0]
- // if len(qu.ObjToString(listone["href"])) < 7 ||
- // (qu.ObjToString(listone["publishtime"]) != "0" && len(qu.ObjToString(listone["publishtime"])) < 5) ||
- // len(qu.ObjToString(listone["title"])) < 3 {
- // f.ServeJson(steps)
- // return
- // } else {
- // steps[2] = true
- // if s.DownDetail {
- // param := map[string]string{}
- // index := 0
- // if len(list) > 0 {
- // steps[8] = len(list)
- // index = len(list) / 2
- // for k, v := range list[index] {
- // param[k] = qu.ObjToString(v)
- // }
- // data = map[string]interface{}{}
- // s.DownloadDetailPage(param, data)
- // if len(data) > 0 {
- // tr.dataInfo = data
- // }
- // if len(data) == 0 || data["detail"].(string) == "" {
- // steps[3] = false
- // } else {
- // steps[3] = true
- // }
- // }
- // } else {
- // steps[3] = true
- // }
- // }
- //}
- }
- //关闭laustate
- s.L.Close()
- steps[4] = s.Test_luareqcount
- steps[5] = s.Test_goreqtime
- steps[6] = s.Test_goreqlist
- steps[7] = s.Test_goreqcon
- //校验
- msg := u.SpiderPassCheckListAndDetail(result, data)
- msgArr = append(msgArr, msg...)
- username := f.GetSession("username").(string)
- tr.msg = strings.Join(msgArr, ";")
- TestResultMap[username+code] = tr
- f.ServeJson(steps)
- }
- func (f *Front) DownSpider(code string) {
- auth := qu.IntAll(f.GetSession("auth"))
- user := f.GetSession("loginuser")
- success := false
- script := ""
- if auth > u.Role_Dev {
- success = true
- //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- createuserid := qu.ObjToString((*one)["createuserid"])
- filename := code + ".lua"
- if len(*one) > 0 {
- if (*one)["oldlua"] != nil {
- if (*one)["luacontent"] != nil {
- script = (*one)["luacontent"].(string)
- }
- } else {
- user, _ := u.MgoEB.FindById("user", createuserid, nil)
- name := (*one)["createuser"]
- email := (*user)["s_email"]
- upload := time.Now().Format("2006-01-02 15:04:05")
- script, _, _ = spider.GetScript(code, name, email, upload)
- }
- }
- f.ResponseWriter.Header().Del("Content-Type")
- f.ResponseWriter.Header().Add("Content-Type", "application/x-download")
- f.ResponseWriter.Header().Add("Content-Disposition", "attachment;filename=spider_"+filename)
- f.WriteBytes([]byte(script))
- } else {
- f.Write("您没有权限")
- }
- //记录日志
- downlogs := map[string]interface{}{
- "code": code,
- "user": user,
- "auth": auth,
- "time": time.Now().Unix(),
- "success": success,
- "script": script,
- }
- u.MgoEB.Save("luadownlogs", downlogs)
- }
- //下架删除心跳
- func DelSpiderHeart(code string) bool {
- return u.MgoS.Update("spider_heart", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"del": true}}, false, true)
- }
- //下架删除download数据
- //func delDownloadData(code string) bool {
- // return mgu.Del("download", "spider", "spider", `{"code":"`+code+`"}`)
- //}
- //批量作废删除download数据
- //func disableDelDownloadData(code []string) {
- // for _, v := range code {
- // flag := delDownloadData(v)
- // log.Println(code, "---批量删除download数据:", flag)
- // }
- //}
- //爬虫核对
- func (f *Front) Checktime() {
- code := f.GetString("code")
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != u.Role_Admin {
- f.ServeJson(false)
- } else {
- //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{
- // "l_checktime": time.Now().Unix(),
- //}}, true, false)
- b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{
- "l_checktime": time.Now().Unix(),
- }}, true, false)
- f.ServeJson(b)
- }
- }
- //批量作废
- func (f *Front) Disables() error {
- auth := qu.IntAll(f.GetSession("auth"))
- names := strings.Split(f.GetString("names"), ",")
- ids := strings.Split(f.GetString("ids"), ",")
- codes := strings.Split(f.GetString("codes"), ",")
- disablereason := f.GetString("disablereason")
- res := ""
- if IsHasUpState(auth, Sp_state_4) {
- for k, id := range ids {
- b, err := UpStateAndUpSpider("", id, disablereason, "", Sp_state_4)
- if b { //作废成功
- //修改任务状态
- UpTaskState(codes, 4, "", int64(0))
- //删除download表数据
- //go disableDelDownloadData(codes)
- if err != nil {
- res = res + names[k] + ",ok" + qu.ObjToString(err.Error()) + ";"
- } else {
- res = res + names[k] + ",ok" + ";"
- }
- } else {
- res = res + names[k] + "," + qu.ObjToString(err.Error()) + ";"
- }
- }
- } else {
- res = "没有权限"
- }
- f.ServeJson(res)
- return nil
- }
- //批量上下架
- func (f *Front) BatchShelves() {
- codes := strings.Split(f.GetString("codes"), ",")
- state, _ := f.GetInteger("state")
- auth := qu.IntAll(f.GetSession("auth"))
- errCode := []string{}
- var err error
- b := false
- if IsHasUpState(auth, Sp_state_5) {
- if state == 5 { //批量上架
- for _, code := range codes {
- _, err = UpStateAndUpSpider(code, "", "", "", Sp_state_5)
- if err != nil {
- errCode = append(errCode, code)
- }
- }
- } else { //批量下架
- for _, code := range codes {
- b, err = UpStateAndUpSpider(code, "", "", "", Sp_state_6)
- if !b || err != nil {
- errCode = append(errCode, code)
- }
- //下架删除download数据
- //if b {
- // flag := delDownloadData(code)
- // log.Println(code, "---删除download数据:", flag)
- //}
- }
- }
- } else {
- errCode = append(errCode, "没有权限")
- }
- f.ServeJson(errCode)
- }
- //更新爬虫状态
- func (f *Front) UpState() error {
- username := f.GetSession("username").(string)
- code := f.GetString("code")
- state, _ := f.GetInt("state")
- id := f.GetString("taskId")
- reason := f.GetString("reason")
- auth := qu.IntAll(f.GetSession("auth"))
- var codeArr = []string{code}
- var taskid []string
- //修改任务状态
- istotask := false
- res := map[string]interface{}{
- "istotask": istotask,
- "err": "没有权限",
- "code": util.Se.Encode2Hex(code),
- "taskid": taskid,
- }
- var xgTime int64
- if f.GetSession(id) == nil || f.GetSession(id) == "" {
- xgTime = time.Now().Unix()
- } else {
- xgTimeStr := qu.ObjToString(f.GetSession(id))
- xgTimeTmp, _ := time.ParseInLocation("2006-01-02 15:04:05", xgTimeStr, time.Local)
- xgTime = xgTimeTmp.Unix()
- }
- f.DelSession(id)
- if IsHasUpState(auth, int(state)) {
- b, err := UpStateAndUpSpider(code, "", reason, username, int(state)) //更新爬虫状态
- if b && state == Sp_state_1 { //提交审核
- //有对应任务跳转提交记录页
- taskid = checkTask(codeArr, 1)
- if len(taskid) > 0 {
- res["istotask"] = true
- res["taskid"] = taskid[0]
- }
- } else if b && state == Sp_state_2 { //打回
- taskid = checkTask(codeArr, 2)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 2) //修改状态
- UpTaskState(taskid, 2, "", int64(0)) //修改任务状态
- SaveRemark(taskid, reason, username) //保存记录信息
- }
- } else if b && state == Sp_state_3 { //审核通过
- taskid = checkTask(codeArr, 3)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 3)
- UpTaskState(taskid, 3, "", int64(0))
- SaveRemark(taskid, "", username)
- }
- } else if b && state == Sp_state_6 { //下架
- //下架成功删除心跳数据
- flag := DelSpiderHeart(code)
- log.Println(code, "---下架删除download数据:", flag)
- } else if b && state == Sp_state_7 { //反馈
- taskid = checkTask(codeArr, 7)
- if len(taskid) > 0 {
- UpTaskState(taskid, 7, reason, xgTime)
- }
- }
- if err != nil {
- res["err"] = err.Error()
- f.ServeJson(res)
- } else {
- res["err"] = ""
- f.ServeJson(res)
- }
- } else {
- f.ServeJson(res)
- }
- return nil
- }
- func (f *Front) Assort() {
- state, _ := f.GetInteger("state")
- code := f.GetString("code")
- codes := u.SymbolReg.Split(code, -1)
- success := true
- msg := ""
- for _, code := range codes {
- query := map[string]interface{}{
- "code": code,
- }
- //下架爬虫
- //lua, _ := u.MgoE.FindOne("luaconfig", query)
- lua, _ := u.MgoEB.FindOne("luaconfig", query)
- upresult, err := spider.UpdateSpiderByCodeState(code, "6", qu.IntAll((*lua)["event"]))
- qu.Debug("下架爬虫:", code, upresult, err)
- if upresult && err == nil {
- //更新爬虫状态
- update := map[string]interface{}{
- "$set": map[string]interface{}{
- "state": state,
- //"modifytime": time.Now().Unix(),
- "l_uploadtime": time.Now().Unix(),
- },
- }
- //u.MgoE.Update("luaconfig", query, update, false, false)
- u.MgoEB.Update("luaconfig", query, update, false, false)
- //关闭任务
- query = map[string]interface{}{
- "s_code": code,
- }
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 6,
- },
- }
- u.MgoEB.Update("task", query, update, false, true)
- //删除心跳
- DelSpiderHeart(code)
- } else {
- success = false
- msg += code + ";"
- }
- }
- f.ServeJson(map[string]interface{}{"success": success, "msg": msg})
- }
- //更新爬虫状态,并判断是否更新节点爬虫
- func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, error) {
- upresult := false
- var err error
- one := &map[string]interface{}{}
- if code != "" {
- //one, _ = u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- one, _ = u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- } else {
- //one, _ = u.MgoE.FindById("luaconfig", id, nil)
- one, _ = u.MgoEB.FindById("luaconfig", id, nil)
- code = qu.ObjToString((*one)["code"])
- }
- if len(*one) > 0 {
- var event int
- unset := map[string]interface{}{}
- if (*one)["event"] != nil {
- event = qu.IntAll((*one)["event"])
- } else {
- for k, _ := range util.Config.Uploadevents { //?
- event = qu.IntAll(k)
- break
- }
- //r := rand.New(rand.NewSource(time.Now().UnixNano()))
- //event = util.Config.Uploadevents[r.Intn(len(util.Config.Uploadevents))]
- }
- //oldstate := qu.IntAll(one["state"])
- switch state {
- case Sp_state_4, Sp_state_6: //作废、下架
- // if oldstate == Sp_state_5 {
- // upresult = false
- // err = errors.New("已上架不允许作废")
- // } else {
- // upresult = true
- // }
- upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event) //下架
- qu.Debug("下架:", upresult, code)
- case Sp_state_5: //上架(爬虫端在更新上架的时候为了更新内存中字段,采用先下架上架)
- if downevent := qu.IntAll((*one)["downevent"]); downevent != 0 { //爬虫开发修改爬虫节点,审核人员上架爬虫时,原来爬虫所在节点下架
- upresult, err = spider.UpdateSpiderByCodeState(code, "6", downevent)
- if upresult && err == nil {
- unset = map[string]interface{}{"downevent": ""}
- }
- }
- upresult, err = spider.UpdateSpiderByCodeState(code, "6", event)
- qu.Debug("下架:", upresult, code)
- if upresult && err == nil {
- upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event)
- qu.Debug("上架:", upresult, code)
- }
- default:
- upresult = true
- err = nil
- }
- if err != nil && strings.Contains(err.Error(), "timeout") {
- err = errors.New("连接节点" + fmt.Sprint(event) + "超时")
- upresult = true
- }
- if upresult && err == nil {
- upset := map[string]interface{}{"state": state} //修改状态
- if (*one)["oldlua"] != nil { //老脚本上传
- //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, true, false)
- up := map[string]interface{}{
- "$set": upset,
- }
- if len(unset) > 0 {
- up["$unset"] = unset
- }
- upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, true, false)
- } else {
- if state == Sp_state_1 { //提交审核
- upset["l_complete"] = time.Now().Unix()
- upset["report"] = ""
- } else if state == Sp_state_7 { //反馈问题
- upset["report"] = reason
- upset["state"] = 1 //反馈后爬虫改为待审核
- upset["l_complete"] = time.Now().Unix()
- } else if state == Sp_state_3 { //审核通过
- if (*one)["event"] == nil {
- upset["event"] = event
- //upset["modifytime"] = time.Now().Unix()
- }
- upset["frequencyerrtimes"] = 0 //爬虫审核通过,重置采集频率异常次数
- upset["l_uploadtime"] = time.Now().Unix()
- } else if state == Sp_state_2 { //打回原因
- upset["reason"] = reason
- } else if state == Sp_state_5 { //上架,核对时间重置
- upset["l_checktime"] = 0
- LuaSaveLog(code, username, one, 1)
- } else if state == Sp_state_4 { //作废,作废原因
- upset["disablereason"] = reason
- //upset["modifytime"] = time.Now().Unix()
- upset["l_uploadtime"] = time.Now().Unix() //l_complete爬虫完成时间
- }
- up := map[string]interface{}{
- "$set": upset,
- }
- if len(unset) > 0 {
- up["$unset"] = unset
- }
- //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false)
- upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, false, false)
- qu.Debug("提交日志:", code, upset, upresult)
- if upresult && (state == Sp_state_2 || state == Sp_state_3) { //打回、审核记录日志
- types := "打回"
- if state == Sp_state_3 {
- types = "审核"
- }
- event := qu.IntAll((*one)["event"])
- obj := map[string]interface{}{
- "code": code,
- "auditor": username,
- "types": types,
- "comeintime": time.Now().Unix(),
- "reason": reason,
- "spideruser": (*one)["createuser"],
- "modifytime": (*one)["modifytime"],
- "event": event,
- "site": (*one)["site"],
- "channel": (*one)["channel"],
- }
- if !strings.HasSuffix(code, u.Bu) { //凡是以_bu结尾的爬虫一律不计入审核记录
- //新爬虫审核记录表
- if event == 7000 && (state == Sp_state_3 || state == Sp_state_2) {
- count := u.MgoEB.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": "审核"})
- if count == 0 { //新爬虫审核记录
- u.MgoEB.Save("lua_logs_auditor_new", obj)
- }
- }
- u.MgoEB.Save("lua_logs_auditor", obj) //历史维护爬虫审核记录
- }
- }
- }
- }
- }
- return upresult, err
- }
- //保存记录信息
- func SaveRemark(taskid []string, reason, username string) {
- timeNow := time.Now().Unix()
- if reason == "" {
- reason = "审核通过"
- }
- for _, id := range taskid {
- task, _ := u.MgoEB.FindById("task", id, nil)
- if task != nil && len(*task) > 0 {
- checkData := (*task)["a_check"]
- var checkArr []map[string]interface{}
- newData := make(map[string]interface{})
- newData["s_check_checkUser"] = username
- newData["l_check_checkTime"] = timeNow
- newData["s_check_checkRemark"] = reason
- if checkData != nil {
- myArr := qu.ObjArrToMapArr(checkData.([]interface{}))
- if myArr != nil && len(myArr) > 0 {
- for _, v := range myArr {
- checkArr = append(checkArr, v)
- }
- }
- }
- checkArr = append(checkArr, newData)
- (*task)["a_check"] = checkArr
- u.MgoEB.UpdateById("task", id, map[string]interface{}{"$set": &task})
- }
- }
- }
- //修改任务状态
- func UpTaskState(code []string, num int, reason string, startTime int64) {
- query := map[string]interface{}{}
- update := map[string]interface{}{}
- for _, v := range code {
- if num == 1 || num == 2 || num == 3 || num == 7 { //id
- query = map[string]interface{}{
- "_id": mongodb.StringTOBsonId(v),
- }
- } else {
- query = map[string]interface{}{ //code
- "s_code": v,
- }
- }
- if num == 1 { //提交审核
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 3,
- },
- }
- } else if num == 2 { //打回 -->未通过
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 5,
- "l_updatetime": time.Now().Unix(),
- },
- }
- } else if num == 3 { //发布(审核通过) -->审核通过
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 4,
- "l_updatetime": time.Now().Unix(),
- "l_uploadtime": time.Now().Unix(),
- },
- }
- } else if num == 4 { //批量作废 -->关闭
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 6,
- "l_complete": time.Now().Unix(),
- "l_updatetime": time.Now().Unix(),
- },
- }
- } else if num == 7 { //反馈信息 -->待审核
- newData := map[string]interface{}{
- "l_mrecord_comeintime": startTime,
- "l_mrecord_complete": time.Now().Unix(),
- "s_mrecord_remark": reason,
- }
- mrecord := []interface{}{}
- mrecord = append(mrecord, newData)
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 3,
- "l_complete": time.Now().Unix(),
- "a_mrecord": mrecord,
- "l_updatetime": time.Now().Unix(),
- },
- }
- }
- flag := u.MgoEB.Update("task", query, update, false, true)
- log.Println("codeOrId:", query, " 修改任务状态:", flag)
- }
- }
- //更新节点
- func (f *Front) ChangeEvent() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != u.Role_Admin {
- f.ServeJson("没有权限")
- }
- code := f.GetString("code")
- event, _ := f.GetInt("event")
- eventok := false
- for k, _ := range util.Config.Uploadevents {
- if event == qu.Int64All(k) {
- eventok = true
- break
- }
- }
- if !eventok {
- f.ServeJson("没有对应节点")
- return
- }
- //info, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- info, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- if len(*info) > 0 {
- oldevent := qu.IntAll((*info)["event"])
- if qu.IntAll((*info)["state"]) == Sp_state_5 {
- //源节点下架
- _, err := spider.UpdateSpiderByCodeState(code, fmt.Sprint(Sp_state_6), oldevent)
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- "state": Sp_state_6,
- },
- }
- //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- if err != nil && strings.Contains(err.Error(), "timeout") {
- f.ServeJson("连接节点" + fmt.Sprint(oldevent) + "超时")
- } else {
- f.ServeJson(err.Error())
- }
- } else {
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- },
- }
- //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- }
- } else {
- f.ServeJson("没有对应记录")
- }
- }
- //验证用户是否有更改状态权限
- func IsHasUpState(auth, state int) bool {
- rep := false
- switch auth {
- case u.Role_Dev:
- if state == Sp_state_1 || state == Sp_state_7 {
- rep = true
- }
- case u.Role_Examine:
- if state == Sp_state_2 || state == Sp_state_3 {
- rep = true
- }
- case u.Role_Admin:
- rep = true
- default:
- }
- return rep
- }
- var list_fields = `{"_id":1,"code":1,"createuser":1,"modifyuser":1,"modifytime":1,"l_uploadtime":1,"l_checktime":1,"state":1,"param_common":1,"event":1,"urgency":1,"platform":1,"pendstate":1}`
- //脚本管理,结合爬虫运行信息
- func (f *Front) LuaList() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != u.Role_Admin {
- f.ServeJson("没有权限!")
- return
- }
- if f.Method() == "POST" {
- state, _ := f.GetInteger("state")
- event, _ := f.GetInteger("event")
- start, _ := f.GetInteger("start")
- limit, _ := f.GetInteger("length")
- draw, _ := f.GetInteger("draw")
- searchStr := f.GetString("search[value]")
- //search := strings.Replace(searchStr, " ", "", -1)
- search := strings.TrimSpace(searchStr)
- platform := f.GetString("platform")
- query := map[string]interface{}{}
- queryArr := []interface{}{}
- //搜索条件
- if search != "" {
- q1 := map[string]interface{}{}
- q1["$or"] = []interface{}{
- map[string]interface{}{"code": map[string]interface{}{"$regex": search}},
- map[string]interface{}{"createuser": map[string]interface{}{"$regex": search}},
- map[string]interface{}{"param_common.1": map[string]interface{}{"$regex": search}},
- }
- queryArr = append(queryArr, q1)
- }
- //爬虫状态
- q2 := map[string]interface{}{}
- if state > -1 {
- q2 = map[string]interface{}{"state": state}
- } else {
- q2 = map[string]interface{}{
- "state": map[string]interface{}{
- "$in": []int{Sp_state_3, Sp_state_5, Sp_state_6},
- },
- }
- }
- queryArr = append(queryArr, q2)
- //爬虫节点
- q3 := map[string]interface{}{}
- if event > -1 {
- q3 = map[string]interface{}{"event": event}
- queryArr = append(queryArr, q3)
- }
- //爬虫平台
- q4 := map[string]interface{}{}
- if platform != "-1" {
- q4 = map[string]interface{}{"platform": platform}
- queryArr = append(queryArr, q4)
- }
- query["$and"] = queryArr
- sort := `{"%s":%d}`
- orderIndex := f.GetString("order[0][column]")
- orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex))
- orderType := 1
- if f.GetString("order[0][dir]") != "asc" {
- orderType = -1
- }
- sort = fmt.Sprintf(sort, orderName, orderType)
- page := start / 10
- //luas, _ := u.MgoE.Find("luaconfig", query, sort, list_fields, false, start, limit)
- //count := u.MgoE.Count("luaconfig", query)
- luas, _ := u.MgoEB.Find("luaconfig", query, sort, list_fields, false, start, limit)
- count := u.MgoEB.Count("luaconfig", query)
- qu.Debug("query:", query, start, limit, count, len(*luas))
- for k, v := range *luas {
- v["num"] = k + 1 + page*10
- l_uploadtime := qu.Int64All(v["l_uploadtime"])
- v["l_uploadtime"] = qu.FormatDateByInt64(&l_uploadtime, qu.Date_Full_Layout)
- l_checktime := qu.Int64All(v["l_checktime"])
- v["l_checktime"] = qu.FormatDateByInt64(&l_checktime, qu.Date_Full_Layout)
- if l_checktime > 0 { //核对
- v["is_check"] = true
- } else { //未核对
- v["is_check"] = false
- }
- if tmp, ok := spinfos.Load(v["code"]); ok {
- info := tmp.(*spinfo)
- v["modifytime"] = info.lastHeartbeat
- v["yesterday"] = fmt.Sprint(info.yesterdayDowncount) + "/" + fmt.Sprint(info.yestoDayRequestNum)
- v["terday"] = fmt.Sprint(info.todayDowncount) + "/" + fmt.Sprint(info.toDayRequestNum)
- v["lastdowncount"] = info.lastDowncount
- v["lstate"] = info.lstate
- } else {
- v["modifytime"] = ""
- v["yesterday"] = ""
- v["terday"] = ""
- v["lastdowncount"] = 0
- v["lstate"] = ""
- }
- }
- f.ServeJson(map[string]interface{}{"draw": draw, "data": luas, "recordsFiltered": count, "recordsTotal": count})
- } else {
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- f.Render("lualist.html", &f.T)
- }
- }
- //心跳监控
- func (f *Front) Heart() {
- if f.Method() == "POST" {
- event, _ := f.GetInteger("event")
- start, _ := f.GetInteger("start")
- limit, _ := f.GetInteger("length")
- draw, _ := f.GetInteger("draw")
- searchStr := f.GetString("search[value]")
- search := strings.TrimSpace(searchStr) //只能搜索code
- //qu.Debug("search:", search, "start:", start, "limit:", limit, "draw:", draw)
- query := map[string]interface{}{
- "del": false,
- }
- if event > -1 {
- query["event"] = event
- }
- if search != "" {
- query["code"] = search
- }
- sort := `{"%s":%d}`
- orderIndex := f.GetString("order[0][column]")
- orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex))
- orderType := 1
- if f.GetString("order[0][dir]") != "asc" {
- orderType = -1
- }
- sort = fmt.Sprintf(sort, orderName, orderType)
- qu.Debug("query:", query, "sort:", sort)
- list, _ := u.MgoS.Find("spider_heart", query, sort, nil, false, start, limit)
- count := u.MgoS.Count("spider_heart", query)
- for _, l := range *list {
- code := qu.ObjToString(l["code"])
- qu.Debug(code)
- //d, _ := u.MgoE.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1, "param_common": 1, "str_list": 1, "type_list": 1})
- d, _ := u.MgoEB.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1, "param_common": 1, "str_list": 1, "type_list": 1, "pendtime": 1})
- l["state"] = (*d)["state"]
- l["param_common"] = (*d)["param_common"]
- if lt := qu.Int64All(l["list"]); lt != 0 {
- l["list"] = qu.FormatDateByInt64(<, qu.Date_Full_Layout)
- } else {
- l["list"] = 0
- }
- if dt := qu.Int64All(l["detail"]); dt != 0 {
- l["detail"] = qu.FormatDateByInt64(&dt, qu.Date_Full_Layout)
- } else {
- l["detail"] = 0
- }
- if det := qu.Int64All(l["detailexecute"]); det != 0 {
- l["detailexecute"] = qu.FormatDateByInt64(&det, qu.Date_Full_Layout)
- } else {
- l["detailexecute"] = 0
- }
- if ft := qu.Int64All(l["findlist"]); ft != 0 {
- l["findlist"] = qu.FormatDateByInt64(&ft, qu.Date_Full_Layout)
- } else {
- l["findlist"] = 0
- }
- ut := qu.Int64All(l["updatetime"])
- l["updatetime"] = qu.FormatDateByInt64(&ut, qu.Date_Full_Layout)
- pendtime := qu.Int64All((*d)["pendtime"])
- if pendtime != 0 {
- l["pendtime"] = qu.FormatDateByInt64(&pendtime, qu.Date_Full_Layout)
- } else {
- l["pendtime"] = "0"
- }
- //l["isfindlist"] = "否"
- //typeList := qu.IntAll((*d)["type_list"])
- //strList := qu.ObjToString((*d)["str_list"])
- //if typeList == 1 && strings.Contains(strList, "findListHtml") {
- // l["isfindlist"] = "是"
- //}
- }
- f.ServeJson(map[string]interface{}{"draw": draw, "data": list, "recordsFiltered": count, "recordsTotal": count})
- } else {
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- f.Render("heart.html", &f.T)
- }
- }
- //爬虫信息
- type spinfo struct {
- code string
- todayDowncount, toDayRequestNum int
- yesterdayDowncount, yestoDayRequestNum int
- totalDowncount, totalRequestNum int
- errorNum, roundCount, runRate int
- lastDowncount int
- lastHeartbeat string
- lstate string
- }
- //爬虫信息
- func SpiderInfo(data string) {
- data = util.Se.DecodeString(data)
- infos := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &infos)
- if err != nil {
- return
- }
- for _, tmp := range infos {
- lastHeartbeat := qu.Int64All(tmp["lastHeartbeat"])
- info := &spinfo{
- code: fmt.Sprint(tmp["code"]),
- todayDowncount: qu.IntAll(tmp["todayDowncount"]),
- toDayRequestNum: qu.IntAll(tmp["toDayRequestNum"]),
- yesterdayDowncount: qu.IntAll(tmp["yesterdayDowncount"]),
- yestoDayRequestNum: qu.IntAll(tmp["yestoDayRequestNum"]),
- totalDowncount: qu.IntAll(tmp["totalDowncount"]),
- totalRequestNum: qu.IntAll(tmp["totalRequestNum"]),
- errorNum: qu.IntAll(tmp["errorNum"]),
- roundCount: qu.IntAll(tmp["roundCount"]),
- runRate: qu.IntAll(tmp["runRate"]),
- lastHeartbeat: qu.FormatDateByInt64(&lastHeartbeat, qu.Date_Full_Layout),
- lastDowncount: qu.IntAll(tmp["lastDowncount"]),
- lstate: fmt.Sprint(tmp["lstate"]),
- }
- spinfos.Store(info.code, info)
- //log.Println(info)
- }
- }
- //接受维护任务信息
- func SpiderModifyTask(data string) {
- data = util.Se.DecodeString(data)
- mtasks := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &mtasks)
- if err != nil {
- return
- }
- for k, tmp := range mtasks {
- log.Println(k, tmp)
- }
- }
- //查看是否有该任务
- func checkTask(codes []string, num int) []string {
- // var id string = ""
- query := map[string]interface{}{}
- var idArr []string
- if len(codes) > 0 {
- for _, v := range codes {
- if num == 1 {
- query = map[string]interface{}{
- "s_code": v,
- "i_state": map[string]interface{}{
- "$in": []int{1, 2, 5},
- },
- }
- } else if num == 2 { //打回时查询待审核的任务
- query = map[string]interface{}{
- "s_code": v,
- "i_state": 3,
- }
- } else if num == 3 { //审核通过时查询待处理、处理中、待审核、未通过的任务
- query = map[string]interface{}{
- "s_code": v,
- "i_state": map[string]interface{}{
- "$in": []int{1, 2, 3, 5},
- },
- }
- } else if num == 7 {
- query = map[string]interface{}{
- "s_code": v,
- "i_state": map[string]interface{}{
- "$in": []int{2, 5},
- },
- }
- }
- task, _ := u.MgoEB.Find("task", query, nil, nil, false, -1, -1)
- if task != nil {
- for _, t := range *task {
- idArr = append(idArr, mongodb.BsonIdToSId(t["_id"]))
- }
- }
- return idArr
- }
- }
- return idArr
- }
|