123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788 |
- package front
- import (
- "encoding/json"
- "errors"
- "fmt"
- "log"
- "mongodb"
- "regexp"
- "sort"
- "spider"
- "strconv"
- mu "mfw/util"
- qu "qfw/util"
- util "spiderutil"
- "strings"
- "time"
- u "util"
- )
- type Base struct {
- SpiderCode string
- SpiderCodeOld string
- SpiderName string
- SpiderChannel string
- SpiderDownDetailPage bool
- SpiderStartPage int
- SpiderMaxPage int
- SpiderRunRate int
- Spider2Collection string
- SpiderPageEncoding string
- SpiderStoreMode int //1,2
- SpiderStoreToMsgEvent int
- SpiderTargetChannelUrl string
- SpiderLastDownloadTime string
- SpiderIsHistoricalMend bool
- SpiderIsMustDownload bool
- }
- type Step1 struct {
- Address string
- ContentChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step2 struct {
- Listadd string
- Listadds string
- BlockChooser string
- AddressChooser string
- TitleChooser string
- DateChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step3 struct {
- ContentChooser string
- ElementChooser string
- T_title string
- T_href string
- T_date string
- Expert string
- Types int
- }
- type StepRe3 struct {
- Checked bool
- Expert string
- }
- type OtherBase struct {
- IsFlow int //爬虫所采集数据是否参与数据流程标识
- SpiderType string //爬虫类型:increment增量;history历史
- SpiderHistoryMaxPage int //采集历史数据时的采集最大页
- SpiderMoveEvent string //爬虫采集完历史后要转移到的节点 comm:队列模式、bid:高性能模式
- }
- //加载某个爬虫
- func (f *Front) LoadSpider(codeTaskIdReState string) error {
- tmpStr := strings.Split(codeTaskIdReState, "__")
- code := tmpStr[0]
- taskId := tmpStr[1]
- auth := qu.IntAll(f.GetSession("auth"))
- restate := -1
- if taskId == "restate=1" { //重采编辑
- restate = 1
- } else if taskId == "restate=2" {
- restate = 2
- } else if taskId == "restate=3" {
- restate = 3
- } else {
- if auth == role_dev && qu.ObjToString(f.GetSession(taskId)) == "" {
- xgTime := time.Unix(time.Now().Unix(), 0).Format("2006-01-02 15:04:05")
- f.SetSession(taskId, xgTime)
- }
- }
- copy := f.GetString("copy")
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(code)
- f.T["actiontext"] = "编辑"
- //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- auth := qu.IntAll(f.GetSession("auth"))
- if qu.ObjToString((*lua)["createuserid"]) == f.GetSession("userid").(string) || auth >= 1 {
- if len(*lua) > 0 {
- if copy != "" {
- //luacopy, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": copy})
- luacopy, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": copy})
- if len(*luacopy) > 0 {
- (*lua)["model"] = (*luacopy)["model"]
- common_copy := (*luacopy)["param_common"].([]interface{})
- common := (*lua)["param_common"].([]interface{})
- common_copy[0] = common[0]
- common_copy[1] = common[1]
- common_copy[2] = common[2]
- common_copy[11] = common[11]
- (*lua)["param_common"] = (*luacopy)["param_common"]
- (*lua)["param_time"] = (*luacopy)["param_time"]
- (*lua)["param_list"] = (*luacopy)["param_list"]
- (*lua)["param_content"] = (*luacopy)["param_content"]
- (*lua)["str_list"] = (*luacopy)["str_list"]
- (*lua)["str_time"] = (*luacopy)["str_time"]
- (*lua)["str_content"] = (*luacopy)["str_content"]
- (*lua)["Thref"] = (*luacopy)["Thref"]
- (*lua)["Tpublishtime"] = (*luacopy)["Tpublishtime"]
- (*lua)["Ttitle"] = (*luacopy)["Ttitle"]
- (*lua)["Tdate"] = (*luacopy)["Tdate"]
- (*lua)["type_content"] = (*luacopy)["type_content"]
- (*lua)["type_list"] = (*luacopy)["type_list"]
- (*lua)["type_time"] = (*luacopy)["type_time"]
- }
- }
- if (*lua)["listcheck"] != nil {
- listcheck := (*lua)["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- (*lua)["listcheck"] = listcheck
- }
- if (*lua)["contentcheck"] != nil {
- contentcheck := (*lua)["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- (*lua)["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent((*lua)["model"], "", " ")
- (*lua)["js"] = string(js)
- f.T["lua"] = lua
- f.T["taskId"] = taskId
- f.T["restate"] = restate
- f.T["isflow"] = (*lua)["isflow"]
- f.T["spidertype"] = (*lua)["spidertype"]
- f.T["spidermovevent"] = (*lua)["spidermovevent"]
- f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"]
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- if (*lua)["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spideredit.html", &f.T)
- }
- } else {
- f.Write("您没有编辑他人脚本的权限")
- }
- }
- return nil
- }
- //查看某个爬虫
- func (f *Front) ViewSpider(id string) error {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth >= 1 {
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(id)
- f.T["actiontext"] = "编辑"
- //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- if len(*lua) > 0 {
- if (*lua)["listcheck"] != nil {
- listcheck := (*lua)["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- (*lua)["listcheck"] = listcheck
- }
- if (*lua)["contentcheck"] != nil {
- contentcheck := (*lua)["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- (*lua)["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent((*lua)["model"], "", " ")
- (*lua)["js"] = string(js)
- f.T["lua"] = lua
- f.T["isflow"] = (*lua)["isflow"]
- f.T["spidertype"] = (*lua)["spidertype"]
- f.T["spidermovevent"] = (*lua)["spidermovevent"]
- f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"]
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- if (*lua)["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spiderview.html", &f.T)
- } else {
- f.Write("没有对应记录!")
- return nil
- }
- }
- return f.Redirect("/center")
- } else {
- f.Write("您没有查看他人脚本的权限")
- return nil
- }
- }
- func (f *Front) LoadModel(id string) error {
- if f.Method() == "GET" {
- //lua, _ := u.MgoE.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1)
- lua, _ := u.MgoEB.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1)
- if len(*lua) > 0 {
- f.ServeJson((*lua)[0])
- }
- }
- return f.Redirect("/center")
- }
- func (f *Front) SaveStep() {
- userid, _ := f.GetSession("userid").(string)
- auth := qu.IntAll(f.GetSession("auth"))
- rep := map[string]interface{}{}
- if f.GetString("oldlua") != "" {
- id := f.GetString("code")
- //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": id})
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": id})
- id = qu.ObjToString((*one)["code"])
- script := f.GetStringComm("script")
- if strings.Index(script, id) == -1 {
- rep["msg"] = "code/名称都不能更改"
- f.ServeJson(rep)
- return
- } else {
- upset := map[string]interface{}{"luacontent": script}
- upset["modifytime"] = time.Now().Unix()
- //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false)
- b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false)
- if b {
- rep["msg"] = "保存成功"
- rep["code"] = util.Se.Encode2Hex(id)
- f.ServeJson(rep)
- return
- }
- }
- } else {
- if f.Base.SpiderName != "" && f.Base.SpiderCode != "" {
- code := f.Base.SpiderCode
- //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode})
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode})
- //记录上架操作前的第一次保存时的爬虫历史
- user := f.GetSession("username").(string)
- LuaSaveLog(f.Base.SpiderCode, user, one, 0)
- state := qu.IntAllDef((*one)["state"], 0)
- restate := qu.IntAll((*one)["restate"])
- infoformat := qu.IntAll((*one)["infoformat"])
- comeintime := time.Now().Unix()
- if len((*one)) > 0 {
- comeintime = qu.Int64All((*one)["comeintime"])
- ouserid := qu.ObjToString((*one)["createuserid"])
- if ouserid != userid && auth == role_dev {
- f.Write("权限不够,不能修改他人脚本")
- return
- } else {
- code = qu.ObjToString((*one)["code"])
- f.Base.SpiderCode = code
- f.Base.SpiderName = ((*one)["param_common"].([]interface{}))[1].(string)
- }
- } else {
- if auth != role_admin {
- f.Write("不能新建爬虫,请联系管理员导入")
- return
- }
- }
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- if auth == role_dev {
- //f.Base.SpiderStoreToMsgEvent = 4002
- }
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- }
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- param := map[string]interface{}{}
- common[4] = 1
- param["param_common"] = common
- param["channel"] = f.Base.SpiderChannel
- param["href"] = f.Base.SpiderTargetChannelUrl
- //向导模式
- param["param_time"] = ptime
- param["param_list"] = list
- param["param_content"] = content
- param["type_time"] = f.Step1.Types
- param["type_list"] = f.Step2.Types
- param["type_content"] = f.Step3.Types
- //专家模式
- param["str_time"] = f.Step1.Expert
- param["str_list"] = f.Step2.Expert
- param["str_content"] = f.Step3.Expert
- param["comeintime"] = comeintime
- listcheck = strings.Replace(listcheck, "\n", "\\\\n", -1)
- param["listcheck"] = strings.Replace(listcheck, "\"", "\\\\\"", -1)
- contentcheck = strings.Replace(contentcheck, "\n", "\\\\n", -1)
- param["contentcheck"] = strings.Replace(contentcheck, "\"", "\\\\\"", -1)
- //补充模型
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- model["model"] = s_model
- param["code"] = f.Base.SpiderCode
- param["model"] = model
- if len((*one)) > 0 {
- param["createuser"] = (*one)["createuser"]
- param["createuserid"] = (*one)["createuserid"]
- param["code"] = (*one)["code"]
- //开发员关联任务修改爬虫状态
- state = qu.IntAll((*one)["state"])
- if auth == role_dev && state >= Sp_state_3 && restate != 1 { //开发员修改,已经审核通过(不包含已上架),状态重置为待完成(restate!=1判断,重采修改保存爬虫时不修改爬虫状态)
- param["state"] = 0
- } else {
- param["state"] = state
- }
- } else {
- param["createuser"] = f.GetSession("loginuser")
- param["createuserid"] = f.GetSession("userid")
- param["createuseremail"] = f.GetSession("email")
- param["next"] = f.GetSession("email")
- param["state"] = 0
- }
- if qu.ObjToString((*one)["modifyuser"]) == "" {
- param["modifyuser"] = param["createuser"]
- param["modifyuserid"] = param["createuserid"]
- }
- param["modifytime"] = time.Now().Unix()
- param["Ttitle"] = f.Step3.T_title
- param["Thref"] = f.Step3.T_href
- param["Tdate"] = f.Step3.T_date
- //其他信息
- param["isflow"] = f.OtherBase.IsFlow
- param["spidertype"] = f.OtherBase.SpiderType
- param["spiderhistorymaxpage"] = f.OtherBase.SpiderHistoryMaxPage
- qu.Debug(f.OtherBase.SpiderMoveEvent)
- tmpEvent, err := strconv.Atoi(f.OtherBase.SpiderMoveEvent) //f.OtherBase.SpiderMoveEvent此处SpiderMoveEvent已不表示comm、bid,表示增量的节点
- if f.OtherBase.SpiderType == "history" { //爬虫类型是history的放到7000节点,并记录历史节点
- param["event"] = 7000
- if err == nil {
- param["incrementevent"] = tmpEvent //开发人员切换增量节点
- } else if event := qu.IntAll((*one)["event"]); event != 7000 { //默认增量节点
- param["incrementevent"] = event
- }
- } else if f.OtherBase.SpiderType == "increment" && err == nil { //增量
- param["event"] = tmpEvent //开发人员切换增量节点
- }
- if movevent, ok := util.Config.Uploadevents[f.OtherBase.SpiderMoveEvent].(string); ok && movevent != "" {
- param["spidermovevent"] = movevent
- }
- //开发人员修改爬虫节点后,在审核人员上架时,要在原来的节点下架,临时记录要下架的节点downevent
- if event := qu.IntAll((*one)["event"]); event != tmpEvent && event != 7000 {
- param["downevent"] = event
- }
- //三级页复制
- param["str_recontent"] = f.StepRe3.Expert
- param["iscopycontent"] = f.StepRe3.Checked
- //
- param["listisfilter"] = ListFilterReg.MatchString(f.Step2.Expert) //列表页校验是否含“--关键词过滤”
- matchLua, msg := LuaTextCheck(f.Step2.Expert, f.Step3.Expert, f.Step2.Types, infoformat, model)
- if !matchLua {
- issave := spider.SaveSpider(code, param) //保存脚本
- if issave {
- for k, v := range *one {
- if k != "_id" && param[k] == nil {
- param[k] = v
- }
- }
- Wlog(f.Base.SpiderName, f.Base.SpiderCode, user, f.GetSession("userid").(string), "修改", param)
- rep["msg"] = "保存成功"
- } else {
- rep["msg"] = "保存失败"
- }
- } else {
- rep["msg"] = "保存失败," + msg
- }
- rep["code"] = util.Se.Encode2Hex(code)
- f.ServeJson(rep)
- }
- }
- }
- func (f *Front) SaveChannels() {
- code := f.GetString("code")
- channels := f.GetString("channels")
- channels = strings.ReplaceAll(channels, ",", ",")
- arr := strings.Split(channels, ",")
- ok := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{
- "$addToSet": map[string]interface{}{
- "channels": map[string]interface{}{"$each": arr},
- },
- }, false, false)
- f.ServeJson(map[string]interface{}{"ok": ok})
- }
- func (f *Front) SaveJs() {
- //param_type := f.GetString("param_type")
- //return_type := f.GetString("return_type")
- js_name := f.GetString("js_name")
- js_alias := f.GetString("js_alias")
- jstext := f.GetString("jstext")
- step := f.GetString("step")
- code := f.GetString("code")
- update := map[string]interface{}{
- "runjs": true,
- }
- js := map[string]interface{}{
- "js_text": jstext,
- "js_name": js_name,
- "js_step": step,
- "js_alias": js_alias,
- "js_param": "string",
- "js_return": "string",
- }
- if step == "list" {
- update["js_list"] = js
- } else if step == "detail" {
- update["js_detail"] = js
- }
- u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": update}, false, false)
- f.ServeJson(map[string]interface{}{"ok": true})
- }
- func LuaSaveLog(code, user string, data *map[string]interface{}, stype int) {
- saveOne, _ := u.MgoEB.FindOne("luasavelog", map[string]interface{}{"state": 0, "code": code})
- if stype == 0 { //保存记录
- if len(*saveOne) == 0 && len(*data) > 0 { //重新记录
- delete(*data, "_id")
- save := map[string]interface{}{
- "code": code,
- "state": 0,
- "saveuser": user,
- "comeintime": time.Now().Unix(),
- "luaold": data,
- }
- u.MgoEB.Save("luasavelog", save)
- }
- } else if stype == 1 { //对比
- if len(*saveOne) > 0 {
- tmp := (*saveOne)["luaold"].(map[string]interface{})
- updateMap := map[string]interface{}{} //记录字段改变值
- for k, v := range *data {
- if k != "_id" && k != "state" && k != "modifytime" {
- if tmpV := tmp[k]; tmpV != nil { //历史记录存在字段
- tmpJson, _ := json.Marshal(tmpV)
- dataJson, _ := json.Marshal(v)
- if string(tmpJson) != string(dataJson) {
- updateMap[k] = v
- }
- delete(tmp, k) //删除对比过的字段
- } else { //历史记录不存在字段
- updateMap[k] = v
- }
- }
- }
- if len(tmp) > 0 {
- for k, _ := range tmp { //上架时爬虫较历史爬虫少的字段信息
- updateMap[k] = nil
- }
- }
- set := map[string]interface{}{"state": 1, "updatetime": time.Now().Unix(), "updateuser": user}
- if len(updateMap) > 0 { //有字段改变
- set["luaupdate"] = updateMap
- set["lusnew"] = data
- }
- u.MgoEB.UpdateById("luasavelog", (*saveOne)["_id"], map[string]interface{}{"$set": set})
- }
- }
- }
- //爬虫保存时,检查列表页和三级页代码中是否含lua原生方法
- func LuaTextCheck(list, detail string, type_list, infoformat int, model map[string]interface{}) (b bool, msg string) {
- defer qu.Catch()
- if LuaReg.MatchString(list) || LuaReg.MatchString(detail) {
- msg = "代码中含有lua原生方法;"
- }
- if ListFilterReg.MatchString(detail) && !strings.Contains(detail, "delete") { //三级页含过滤但是没有data["delete"]="true"
- msg = `三级页缺少data["delete"]="true"`
- }
- sln_reg := regexp.MustCompile(`sendListNum\(pageno,list\)`)
- slnIndexArr := sln_reg.FindAllStringIndex(list, -1)
- if type_list != 0 && len(slnIndexArr) == 0 { //列表页专家模式且不含sendListNum
- msg = "代码中缺少sendListNum(pageno,list)方法;" + msg
- } else if type_list == 1 && len(slnIndexArr) > 0 { //判断sendListNum方法的位置
- trim_reg := regexp.MustCompile("trim")
- insert_reg := regexp.MustCompile("insert")
- trIndexArr := trim_reg.FindAllStringIndex(list, -1)
- irIndexArr := insert_reg.FindAllStringIndex(list, -1)
- slIndex := slnIndexArr[len(slnIndexArr)-1] //sendListNum位置
- trIndex := trIndexArr[len(trIndexArr)-1] //com.trim位置
- irIndex := irIndexArr[len(irIndexArr)-1] //insert位置
- qu.Debug("sendListNum位置:", trIndex, slIndex, irIndex)
- if slIndex[1] < trIndex[0] || slIndex[0] > irIndex[1] { //sendListNum方法必须在com.trim方法后,table.insert方法前
- msg = "sendListNum方法位置错误;" + msg
- }
- }
- if type_list == 1 {
- area := qu.ObjToString(model["area"])
- city := qu.ObjToString(model["city"])
- district := qu.ObjToString(model["district"])
- if area != "" && !strings.Contains(list, area) {
- msg += "省份信息与模板不一致;"
- }
- if city != "" && !strings.Contains(list, city) {
- msg += "城市信息与模板不一致;"
- }
- if district != "" && !strings.Contains(list, district) {
- msg += "区/县信息与模板不一致;"
- }
- if infoformat == 2 && !strings.Contains(detail, "projectname") {
- msg += "拟建/审批数据缺少projectname字段;"
- }
- }
- b = msg != ""
- return
- }
- //方法测试
- func (f *Front) RunStep() {
- imodal, _ := f.GetInteger("imodal")
- script, _ := f.GetBool("script")
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- downloadnode := f.GetString("downloadnode") //下载节点
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- "",
- "",
- "",
- }
- if f.Method() == "POST" {
- switch f.GetString("step") {
- case "step1": //publishtime
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- if script {
- _, scripts := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal, 1)
- f.ServeJson(scripts)
- return
- }
- rs, err := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- }
- case "step2": //list
- addrs := strings.Split(f.Step2.Listadds, "\n")
- if len(addrs) > 0 {
- for k, v := range addrs {
- addrs[k] = "'" + v + "'"
- }
- f.Step2.Listadds = strings.Join(addrs, ",")
- } else if len(f.Step2.Listadds) > 5 {
- f.Step2.Listadds = "'" + f.Step2.Listadds + "'"
- } else {
- f.Step2.Listadds = ""
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- listcheck = strings.Replace(listcheck, "\n", "\\n", -1)
- listcheck = strings.Replace(listcheck, "\"", "\\\"", -1)
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- if script {
- _, script := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- } else if err.(error).Error() == "no" {
- f.ServeJson(rs[0])
- }
- case "step3": //detail
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- contentcheck = strings.Replace(contentcheck, "\n", "\\n", -1)
- contentcheck = strings.Replace(contentcheck, "\"", "\\\"", -1)
- data := map[string]interface{}{}
- data["title"] = f.Step3.T_title
- data["href"] = f.Step3.T_href
- data["publishtime"] = f.Step3.T_date
- if script {
- _, script := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal)
- if projectinfo, ok := rs["projectinfo"].(map[string]interface{}); ok && projectinfo != nil {
- if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok && attachments != nil {
- for _, tmp := range attachments {
- tmpMap := tmp.(map[string]interface{})
- if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" {
- rs["msg"] = "附件中含有乱码"
- }
- }
- }
- }
- if err == nil {
- f.ServeJson(rs)
- } else {
- f.ServeJson(rs["no"])
- }
- }
- }
- }
- //爬虫测试数据json
- func (f *Front) GetJson() {
- code := f.GetString("code")
- username := f.GetSession("username").(string)
- if tr := TestResultMap[username+code]; tr != nil {
- task, _ := u.MgoEB.FindOne("task", map[string]interface{}{"code": code, "i_state": 3})
- comeintime := int64(0)
- if len(*task) > 0 {
- comeintime = qu.Int64All((*task)["l_comeintime"])
- tr.task_remark = "审核任务创建时间:" + qu.FormatDateByInt64(&comeintime, qu.Date_Short_Layout) + ";" + tr.task_remark
- }
- data := tr.dataInfo
- result := tr.listInfo
- if len(data) > 0 {
- data["contenthtml"] = ""
- }
- num := 0
- list_fir := []map[string]interface{}{}
- list_sec := []map[string]interface{}{}
- for page, list := range result {
- for k, v := range list {
- v["a_index"] = k + 1
- num++
- }
- if page == 1 {
- list_fir = list
- } else if page == 2 {
- list_sec = list
- }
- }
- f.T["list_fir"] = list_fir
- f.T["list_sec"] = list_sec
- f.T["data"] = data
- f.T["num"] = num
- f.T["descript"] = tr.task_descript
- f.T["remark"] = tr.task_remark
- f.T["rateremark"] = tr.task_rateremark
- f.T["reason"] = tr.reason
- f.T["msg"] = tr.msg
- f.T["comeintime"] = comeintime
- delete(TestResultMap, username+code)
- }
- f.Render("jsonInfo.html", &f.T)
- }
- var TestResultMap = map[string]*TestResult{} //username+code
- //某个爬虫整体测试结果
- type TestResult struct {
- task_remark string
- task_rateremark []string
- task_descript string
- reason string
- msg string
- listInfo map[int64][]map[string]interface{}
- dataInfo map[string]interface{}
- }
- //整体测试
- func (f *Front) SpiderPass() {
- defer mu.Catch()
- tr := &TestResult{}
- result := map[int64][]map[string]interface{}{}
- data := map[string]interface{}{}
- msgArr := []string{}
- code := f.GetString("code")
- downloadnode := f.GetString("node")
- //根据code查询待确认任务
- query := map[string]interface{}{
- "s_code": code,
- "i_state": 3,
- }
- task, _ := u.MgoEB.FindOne("task", query)
- descript := "null"
- remark := "null"
- remarktmp := []string{}
- rateremarktmp := []string{}
- if len(*task) > 0 {
- descript = (*task)["s_descript"].(string)
- if mrecord, ok := (*task)["a_mrecord"].([]interface{}); ok {
- for _, m := range mrecord {
- remarkInfo := m.(map[string]interface{})
- if remark := qu.ObjToString(remarkInfo["s_mrecord_remark"]); remark != "" {
- remarktmp = append(remarktmp, remark+";")
- }
- if rateremark := qu.ObjToString(remarkInfo["s_mrecord_rateremark"]); rateremark != "" {
- rateremarktmp = append(rateremarktmp, rateremark+";")
- }
- }
- }
- }
- if len(remarktmp) > 0 {
- remark = ""
- remark = strings.Join(remarktmp, "")
- }
- tr.task_remark = remark
- tr.task_rateremark = rateremarktmp
- tr.task_descript = descript
- //基本信息、方法一(发布时间)、方法二(列表页)、方法三(详情页)、总请求次数、go方法一、go方法二、go方法三、列表页条数
- steps := []interface{}{false, false, false, false, 0, 0, 0, 0, 0}
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- reason, _ := (*one)["reason"].(string)
- tr.reason = reason
- if len(*one) > 0 && (*one)["oldlua"] == nil {
- common := (*one)["param_common"].([]interface{})
- if len(common) < 13 {
- f.ServeJson(steps)
- return
- } else {
- steps[0] = true
- }
- } else {
- steps[0] = true
- }
- script, liststr, contentstr := "", "", ""
- if (*one)["oldlua"] == nil {
- script, liststr, contentstr = spider.GetScript(code)
- } else {
- script = (*one)["luacontent"].(string)
- }
- if liststr != "" && contentstr != "" {
- msgArr = u.SpiderPassCheckLua(liststr, contentstr, (*one)) //校验
- }
- s := spider.CreateSpider(downloadnode, script)
- s.SpiderMaxPage = 2 //采集列表页总页数
- s.Timeout = 60
- timestr, timeerr := s.GetLastPublishTime()
- if timeerr == nil && len(timestr) > 4 {
- steps[1] = true //发布时间获取成功
- downloadNum := 0
- result, downloadNum, _ = s.DownListPageItem() //列表页采集结果
- if downloadNum == 0 {
- f.ServeJson(steps)
- return
- }
- steps[2] = true //列表页获取成功
- steps[8] = downloadNum //下载量
- tr.listInfo = result
- if s.DownDetail {
- onePageList := result[1] //第一页数据
- if onePageDataNum := len(onePageList); onePageDataNum > 0 {
- index := onePageDataNum / 2 //取一条数据下载三级页
- param := map[string]string{}
- for k, v := range onePageList[index] {
- param[k] = qu.ObjToString(v)
- }
- data = map[string]interface{}{}
- s.DownloadDetailPage(param, data)
- tr.dataInfo = data
- if len(data) == 0 || qu.ObjToString(data["detail"]) == "" {
- steps[3] = false //详情页获取失败
- } else {
- steps[3] = true //详情页获取成功
- }
- }
- } else {
- steps[3] = true //详情页获取成功
- }
- //list, _ = s.DownListPageItem()
- //for _, l := range list {
- // if publishtime := qu.ObjToString(l["publishtime"]); publishtime == "0" || publishtime == "" {
- // msgArr = append(msgArr, "列表页publishtime取值异常")
- // break
- // } else {
- // t, err := time.ParseInLocation(qu.Date_Full_Layout, publishtime, time.Local)
- // if err != nil || t.Unix() <= 0 {
- // msgArr = append(msgArr, "列表页publishtime取值异常")
- // break
- // }
- // }
- //}
- //if len(list) > 0 {
- // tr.listInfo = list
- // listone := list[0]
- // if len(qu.ObjToString(listone["href"])) < 7 ||
- // (qu.ObjToString(listone["publishtime"]) != "0" && len(qu.ObjToString(listone["publishtime"])) < 5) ||
- // len(qu.ObjToString(listone["title"])) < 3 {
- // f.ServeJson(steps)
- // return
- // } else {
- // steps[2] = true
- // if s.DownDetail {
- // param := map[string]string{}
- // index := 0
- // if len(list) > 0 {
- // steps[8] = len(list)
- // index = len(list) / 2
- // for k, v := range list[index] {
- // param[k] = qu.ObjToString(v)
- // }
- // data = map[string]interface{}{}
- // s.DownloadDetailPage(param, data)
- // if len(data) > 0 {
- // tr.dataInfo = data
- // }
- // if len(data) == 0 || data["detail"].(string) == "" {
- // steps[3] = false
- // } else {
- // steps[3] = true
- // }
- // }
- // } else {
- // steps[3] = true
- // }
- // }
- //}
- }
- //关闭laustate
- s.L.Close()
- steps[4] = s.Test_luareqcount
- steps[5] = s.Test_goreqtime
- steps[6] = s.Test_goreqlist
- steps[7] = s.Test_goreqcon
- //校验
- msg := u.SpiderPassCheckListAndDetail(result, data)
- msgArr = append(msgArr, msg...)
- username := f.GetSession("username").(string)
- tr.msg = strings.Join(msgArr, ";")
- TestResultMap[username+code] = tr
- f.ServeJson(steps)
- }
- func (f *Front) DownSpider(code string) {
- auth := qu.IntAll(f.GetSession("auth"))
- user := f.GetSession("loginuser")
- success := false
- script := ""
- if auth > role_dev {
- success = true
- //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- createuserid := qu.ObjToString((*one)["createuserid"])
- filename := code + ".lua"
- if len(*one) > 0 {
- if (*one)["oldlua"] != nil {
- if (*one)["luacontent"] != nil {
- script = (*one)["luacontent"].(string)
- }
- } else {
- user, _ := u.MgoEB.FindById("user", createuserid, nil)
- name := (*one)["createuser"]
- email := (*user)["s_email"]
- upload := time.Now().Format("2006-01-02 15:04:05")
- script, _, _ = spider.GetScript(code, name, email, upload)
- }
- }
- f.ResponseWriter.Header().Del("Content-Type")
- f.ResponseWriter.Header().Add("Content-Type", "application/x-download")
- f.ResponseWriter.Header().Add("Content-Disposition", "attachment;filename=spider_"+filename)
- f.WriteBytes([]byte(script))
- } else {
- f.Write("您没有权限")
- }
- //记录日志
- downlogs := map[string]interface{}{
- "code": code,
- "user": user,
- "auth": auth,
- "time": time.Now().Unix(),
- "success": success,
- "script": script,
- }
- u.MgoEB.Save("luadownlogs", downlogs)
- }
- //下架删除心跳
- func DelSpiderHeart(code string) bool {
- return u.MgoS.Update("spider_heart", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"del": true}}, false, true)
- }
- //下架删除download数据
- //func delDownloadData(code string) bool {
- // return mgu.Del("download", "spider", "spider", `{"code":"`+code+`"}`)
- //}
- //批量作废删除download数据
- //func disableDelDownloadData(code []string) {
- // for _, v := range code {
- // flag := delDownloadData(v)
- // log.Println(code, "---批量删除download数据:", flag)
- // }
- //}
- //爬虫核对
- func (f *Front) Checktime() {
- code := f.GetString("code")
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson(false)
- } else {
- //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{
- // "l_checktime": time.Now().Unix(),
- //}}, true, false)
- b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{
- "l_checktime": time.Now().Unix(),
- }}, true, false)
- f.ServeJson(b)
- }
- }
- //批量作废
- func (f *Front) Disables() error {
- auth := qu.IntAll(f.GetSession("auth"))
- names := strings.Split(f.GetString("names"), ",")
- ids := strings.Split(f.GetString("ids"), ",")
- codes := strings.Split(f.GetString("codes"), ",")
- disablereason := f.GetString("disablereason")
- res := ""
- if IsHasUpState(auth, Sp_state_4) {
- for k, id := range ids {
- b, err := UpStateAndUpSpider("", id, disablereason, "", Sp_state_4)
- if b { //作废成功
- //修改任务状态
- UpTaskState(codes, 4, "", int64(0))
- //删除download表数据
- //go disableDelDownloadData(codes)
- if err != nil {
- res = res + names[k] + ",ok" + qu.ObjToString(err.Error()) + ";"
- } else {
- res = res + names[k] + ",ok" + ";"
- }
- } else {
- res = res + names[k] + "," + qu.ObjToString(err.Error()) + ";"
- }
- }
- } else {
- res = "没有权限"
- }
- f.ServeJson(res)
- return nil
- }
- //批量上下架
- func (f *Front) BatchShelves() {
- codes := strings.Split(f.GetString("codes"), ",")
- state, _ := f.GetInteger("state")
- auth := qu.IntAll(f.GetSession("auth"))
- errCode := []string{}
- var err error
- b := false
- if IsHasUpState(auth, Sp_state_5) {
- if state == 5 { //批量上架
- for _, code := range codes {
- _, err = UpStateAndUpSpider(code, "", "", "", Sp_state_5)
- if err != nil {
- errCode = append(errCode, code)
- }
- }
- } else { //批量下架
- for _, code := range codes {
- b, err = UpStateAndUpSpider(code, "", "", "", Sp_state_6)
- if !b || err != nil {
- errCode = append(errCode, code)
- }
- //下架删除download数据
- //if b {
- // flag := delDownloadData(code)
- // log.Println(code, "---删除download数据:", flag)
- //}
- }
- }
- } else {
- errCode = append(errCode, "没有权限")
- }
- f.ServeJson(errCode)
- }
- //更新爬虫状态
- func (f *Front) UpState() error {
- username := f.GetSession("username").(string)
- code := f.GetString("code")
- state, _ := f.GetInt("state")
- id := f.GetString("taskId")
- reason := f.GetString("reason")
- auth := qu.IntAll(f.GetSession("auth"))
- var codeArr = []string{code}
- var taskid []string
- //修改任务状态
- istotask := false
- res := map[string]interface{}{
- "istotask": istotask,
- "err": "没有权限",
- "code": util.Se.Encode2Hex(code),
- "taskid": taskid,
- }
- var xgTime int64
- if f.GetSession(id) == nil || f.GetSession(id) == "" {
- xgTime = time.Now().Unix()
- } else {
- xgTimeStr := qu.ObjToString(f.GetSession(id))
- xgTimeTmp, _ := time.ParseInLocation("2006-01-02 15:04:05", xgTimeStr, time.Local)
- xgTime = xgTimeTmp.Unix()
- }
- f.DelSession(id)
- if IsHasUpState(auth, int(state)) {
- b, err := UpStateAndUpSpider(code, "", reason, username, int(state)) //更新爬虫状态
- if b && state == Sp_state_1 { //提交审核
- //有对应任务跳转提交记录页
- taskid = checkTask(codeArr, 1)
- if len(taskid) > 0 {
- res["istotask"] = true
- res["taskid"] = taskid[0]
- }
- } else if b && state == Sp_state_2 { //打回
- taskid = checkTask(codeArr, 2)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 2) //修改状态
- UpTaskState(taskid, 2, "", int64(0)) //修改任务状态
- SaveRemark(taskid, reason, username) //保存记录信息
- }
- } else if b && state == Sp_state_3 { //审核通过
- taskid = checkTask(codeArr, 3)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 3)
- UpTaskState(taskid, 3, "", int64(0))
- SaveRemark(taskid, "", username)
- }
- } else if b && state == Sp_state_6 { //下架
- //下架成功删除心跳数据
- flag := DelSpiderHeart(code)
- log.Println(code, "---下架删除download数据:", flag)
- } else if b && state == Sp_state_7 { //反馈
- taskid = checkTask(codeArr, 7)
- if len(taskid) > 0 {
- UpTaskState(taskid, 7, reason, xgTime)
- }
- }
- if err != nil {
- res["err"] = err.Error()
- f.ServeJson(res)
- } else {
- res["err"] = ""
- f.ServeJson(res)
- }
- } else {
- f.ServeJson(res)
- }
- return nil
- }
- func (f *Front) Assort() {
- state, _ := f.GetInteger("state")
- code := f.GetString("code")
- codes := u.SymbolReg.Split(code, -1)
- success := true
- msg := ""
- for _, code := range codes {
- query := map[string]interface{}{
- "code": code,
- }
- //下架爬虫
- //lua, _ := u.MgoE.FindOne("luaconfig", query)
- lua, _ := u.MgoEB.FindOne("luaconfig", query)
- upresult, err := spider.UpdateSpiderByCodeState(code, "6", qu.IntAll((*lua)["event"]))
- qu.Debug("下架爬虫:", code, upresult, err)
- if upresult && err == nil {
- //更新爬虫状态
- update := map[string]interface{}{
- "$set": map[string]interface{}{
- "state": state,
- //"modifytime": time.Now().Unix(),
- "l_uploadtime": time.Now().Unix(),
- },
- }
- //u.MgoE.Update("luaconfig", query, update, false, false)
- u.MgoEB.Update("luaconfig", query, update, false, false)
- //关闭任务
- query = map[string]interface{}{
- "s_code": code,
- }
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 6,
- },
- }
- u.MgoEB.Update("task", query, update, false, true)
- //删除心跳
- DelSpiderHeart(code)
- } else {
- success = false
- msg += code + ";"
- }
- }
- f.ServeJson(map[string]interface{}{"success": success, "msg": msg})
- }
- //更新爬虫状态,并判断是否更新节点爬虫
- func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, error) {
- upresult := false
- var err error
- one := &map[string]interface{}{}
- if code != "" {
- //one, _ = u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- one, _ = u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- } else {
- //one, _ = u.MgoE.FindById("luaconfig", id, nil)
- one, _ = u.MgoEB.FindById("luaconfig", id, nil)
- code = qu.ObjToString((*one)["code"])
- }
- if len(*one) > 0 {
- var event int
- unset := map[string]interface{}{}
- if (*one)["event"] != nil {
- event = qu.IntAll((*one)["event"])
- } else {
- for k, _ := range util.Config.Uploadevents { //?
- event = qu.IntAll(k)
- break
- }
- //r := rand.New(rand.NewSource(time.Now().UnixNano()))
- //event = util.Config.Uploadevents[r.Intn(len(util.Config.Uploadevents))]
- }
- //oldstate := qu.IntAll(one["state"])
- switch state {
- case Sp_state_4, Sp_state_6: //作废、下架
- // if oldstate == Sp_state_5 {
- // upresult = false
- // err = errors.New("已上架不允许作废")
- // } else {
- // upresult = true
- // }
- upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event) //下架
- qu.Debug("下架:", upresult, code)
- case Sp_state_5: //上架(爬虫端在更新上架的时候为了更新内存中字段,采用先下架上架)
- if downevent := qu.IntAll((*one)["downevent"]); downevent != 0 { //爬虫开发修改爬虫节点,审核人员上架爬虫时,原来爬虫所在节点下架
- upresult, err = spider.UpdateSpiderByCodeState(code, "6", downevent)
- if upresult && err == nil {
- unset = map[string]interface{}{"downevent": ""}
- }
- }
- upresult, err = spider.UpdateSpiderByCodeState(code, "6", event)
- qu.Debug("下架:", upresult, code)
- if upresult && err == nil {
- upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event)
- qu.Debug("上架:", upresult, code)
- }
- default:
- upresult = true
- err = nil
- }
- if err != nil && strings.Contains(err.Error(), "timeout") {
- err = errors.New("连接节点" + fmt.Sprint(event) + "超时")
- upresult = true
- }
- if upresult && err == nil {
- upset := map[string]interface{}{"state": state} //修改状态
- if (*one)["oldlua"] != nil { //老脚本上传
- //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, true, false)
- up := map[string]interface{}{
- "$set": upset,
- }
- if len(unset) > 0 {
- up["$unset"] = unset
- }
- upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, true, false)
- } else {
- if state == Sp_state_1 { //提交审核
- upset["l_complete"] = time.Now().Unix()
- upset["report"] = ""
- } else if state == Sp_state_7 { //反馈问题
- upset["report"] = reason
- upset["state"] = 1 //反馈后爬虫改为待审核
- upset["l_complete"] = time.Now().Unix()
- } else if state == Sp_state_3 { //审核通过
- if (*one)["event"] == nil {
- upset["event"] = event
- //upset["modifytime"] = time.Now().Unix()
- }
- upset["frequencyerrtimes"] = 0 //爬虫审核通过,重置采集频率异常次数
- upset["l_uploadtime"] = time.Now().Unix()
- } else if state == Sp_state_2 { //打回原因
- upset["reason"] = reason
- } else if state == Sp_state_5 { //上架,核对时间重置
- upset["l_checktime"] = 0
- LuaSaveLog(code, username, one, 1)
- } else if state == Sp_state_4 { //作废,作废原因
- upset["disablereason"] = reason
- //upset["modifytime"] = time.Now().Unix()
- upset["l_uploadtime"] = time.Now().Unix() //l_complete爬虫完成时间
- }
- up := map[string]interface{}{
- "$set": upset,
- }
- if len(unset) > 0 {
- up["$unset"] = unset
- }
- //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false)
- upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, false, false)
- qu.Debug("提交日志:", code, upset, upresult)
- if upresult && (state == Sp_state_2 || state == Sp_state_3) { //打回、审核记录日志
- types := "打回"
- if state == Sp_state_3 {
- types = "审核"
- }
- event := qu.IntAll((*one)["event"])
- obj := map[string]interface{}{
- "code": code,
- "auditor": username,
- "types": types,
- "comeintime": time.Now().Unix(),
- "reason": reason,
- "spideruser": (*one)["createuser"],
- "modifytime": (*one)["modifytime"],
- "event": event,
- "site": (*one)["site"],
- "channel": (*one)["channel"],
- }
- if !strings.HasSuffix(code, u.Bu) { //凡是以_bu结尾的爬虫一律不计入审核记录
- //新爬虫审核记录表
- if event == 7000 && (state == Sp_state_3 || state == Sp_state_2) {
- count := u.MgoEB.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": "审核"})
- if count == 0 { //新爬虫审核记录
- u.MgoEB.Save("lua_logs_auditor_new", obj)
- }
- }
- u.MgoEB.Save("lua_logs_auditor", obj) //历史维护爬虫审核记录
- }
- }
- }
- }
- }
- return upresult, err
- }
- //保存记录信息
- func SaveRemark(taskid []string, reason, username string) {
- timeNow := time.Now().Unix()
- if reason == "" {
- reason = "审核通过"
- }
- for _, id := range taskid {
- task, _ := u.MgoEB.FindById("task", id, nil)
- if task != nil && len(*task) > 0 {
- checkData := (*task)["a_check"]
- var checkArr []map[string]interface{}
- newData := make(map[string]interface{})
- newData["s_check_checkUser"] = username
- newData["l_check_checkTime"] = timeNow
- newData["s_check_checkRemark"] = reason
- if checkData != nil {
- myArr := qu.ObjArrToMapArr(checkData.([]interface{}))
- if myArr != nil && len(myArr) > 0 {
- for _, v := range myArr {
- checkArr = append(checkArr, v)
- }
- }
- }
- checkArr = append(checkArr, newData)
- (*task)["a_check"] = checkArr
- u.MgoEB.UpdateById("task", id, map[string]interface{}{"$set": &task})
- }
- }
- }
- //修改任务状态
- func UpTaskState(code []string, num int, reason string, startTime int64) {
- query := map[string]interface{}{}
- update := map[string]interface{}{}
- for _, v := range code {
- if num == 1 || num == 2 || num == 3 || num == 7 { //id
- query = map[string]interface{}{
- "_id": mongodb.StringTOBsonId(v),
- }
- } else {
- query = map[string]interface{}{ //code
- "s_code": v,
- }
- }
- if num == 1 { //提交审核
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 3,
- },
- }
- } else if num == 2 { //打回 -->未通过
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 5,
- "l_updatetime": time.Now().Unix(),
- },
- }
- } else if num == 3 { //发布(审核通过) -->审核通过
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 4,
- "l_updatetime": time.Now().Unix(),
- "l_uploadtime": time.Now().Unix(),
- },
- }
- } else if num == 4 { //批量作废 -->关闭
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 6,
- "l_complete": time.Now().Unix(),
- "l_updatetime": time.Now().Unix(),
- },
- }
- } else if num == 7 { //反馈信息 -->待审核
- newData := map[string]interface{}{
- "l_mrecord_comeintime": startTime,
- "l_mrecord_complete": time.Now().Unix(),
- "s_mrecord_remark": reason,
- }
- mrecord := []interface{}{}
- mrecord = append(mrecord, newData)
- update = map[string]interface{}{
- "$set": map[string]interface{}{
- "i_state": 3,
- "l_complete": time.Now().Unix(),
- "a_mrecord": mrecord,
- "l_updatetime": time.Now().Unix(),
- },
- }
- }
- flag := u.MgoEB.Update("task", query, update, false, true)
- log.Println("codeOrId:", query, " 修改任务状态:", flag)
- }
- }
- //更新节点
- func (f *Front) ChangeEvent() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson("没有权限")
- }
- code := f.GetString("code")
- event, _ := f.GetInt("event")
- eventok := false
- for k, _ := range util.Config.Uploadevents {
- if event == qu.Int64All(k) {
- eventok = true
- break
- }
- }
- if !eventok {
- f.ServeJson("没有对应节点")
- return
- }
- //info, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
- info, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
- if len(*info) > 0 {
- oldevent := qu.IntAll((*info)["event"])
- if qu.IntAll((*info)["state"]) == Sp_state_5 {
- //源节点下架
- _, err := spider.UpdateSpiderByCodeState(code, fmt.Sprint(Sp_state_6), oldevent)
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- "state": Sp_state_6,
- },
- }
- //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- if err != nil && strings.Contains(err.Error(), "timeout") {
- f.ServeJson("连接节点" + fmt.Sprint(oldevent) + "超时")
- } else {
- f.ServeJson(err.Error())
- }
- } else {
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- },
- }
- //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
- }
- } else {
- f.ServeJson("没有对应记录")
- }
- }
- //验证用户是否有更改状态权限
- func IsHasUpState(auth, state int) bool {
- rep := false
- switch auth {
- case role_dev:
- if state == Sp_state_1 || state == Sp_state_7 {
- rep = true
- }
- case role_examine:
- if state == Sp_state_2 || state == Sp_state_3 {
- rep = true
- }
- case role_admin:
- rep = true
- default:
- }
- return rep
- }
- var list_fields = `{"_id":1,"code":1,"createuser":1,"modifyuser":1,"modifytime":1,"l_uploadtime":1,"l_checktime":1,"state":1,"param_common":1,"event":1,"urgency":1,"platform":1,"pendstate":1}`
- //脚本管理,结合爬虫运行信息
- func (f *Front) LuaList() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson("没有权限!")
- return
- }
- if f.Method() == "POST" {
- state, _ := f.GetInteger("state")
- event, _ := f.GetInteger("event")
- start, _ := f.GetInteger("start")
- limit, _ := f.GetInteger("length")
- draw, _ := f.GetInteger("draw")
- searchStr := f.GetString("search[value]")
- //search := strings.Replace(searchStr, " ", "", -1)
- search := strings.TrimSpace(searchStr)
- platform := f.GetString("platform")
- query := map[string]interface{}{}
- queryArr := []interface{}{}
- //搜索条件
- if search != "" {
- q1 := map[string]interface{}{}
- q1["$or"] = []interface{}{
- map[string]interface{}{"code": map[string]interface{}{"$regex": search}},
- map[string]interface{}{"createuser": map[string]interface{}{"$regex": search}},
- map[string]interface{}{"param_common.1": map[string]interface{}{"$regex": search}},
- }
- queryArr = append(queryArr, q1)
- }
- //爬虫状态
- q2 := map[string]interface{}{}
- if state > -1 {
- q2 = map[string]interface{}{"state": state}
- } else {
- q2 = map[string]interface{}{
- "state": map[string]interface{}{
- "$in": []int{Sp_state_3, Sp_state_5, Sp_state_6},
- },
- }
- }
- queryArr = append(queryArr, q2)
- //爬虫节点
- q3 := map[string]interface{}{}
- if event > -1 {
- q3 = map[string]interface{}{"event": event}
- queryArr = append(queryArr, q3)
- }
- //爬虫平台
- q4 := map[string]interface{}{}
- if platform != "-1" {
- q4 = map[string]interface{}{"platform": platform}
- queryArr = append(queryArr, q4)
- }
- query["$and"] = queryArr
- sort := `{"%s":%d}`
- orderIndex := f.GetString("order[0][column]")
- orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex))
- orderType := 1
- if f.GetString("order[0][dir]") != "asc" {
- orderType = -1
- }
- sort = fmt.Sprintf(sort, orderName, orderType)
- page := start / 10
- //luas, _ := u.MgoE.Find("luaconfig", query, sort, list_fields, false, start, limit)
- //count := u.MgoE.Count("luaconfig", query)
- luas, _ := u.MgoEB.Find("luaconfig", query, sort, list_fields, false, start, limit)
- count := u.MgoEB.Count("luaconfig", query)
- qu.Debug("query:", query, start, limit, count, len(*luas))
- for k, v := range *luas {
- v["num"] = k + 1 + page*10
- l_uploadtime := qu.Int64All(v["l_uploadtime"])
- v["l_uploadtime"] = qu.FormatDateByInt64(&l_uploadtime, qu.Date_Full_Layout)
- l_checktime := qu.Int64All(v["l_checktime"])
- v["l_checktime"] = qu.FormatDateByInt64(&l_checktime, qu.Date_Full_Layout)
- if l_checktime > 0 { //核对
- v["is_check"] = true
- } else { //未核对
- v["is_check"] = false
- }
- if tmp, ok := spinfos.Load(v["code"]); ok {
- info := tmp.(*spinfo)
- v["modifytime"] = info.lastHeartbeat
- v["yesterday"] = fmt.Sprint(info.yesterdayDowncount) + "/" + fmt.Sprint(info.yestoDayRequestNum)
- v["terday"] = fmt.Sprint(info.todayDowncount) + "/" + fmt.Sprint(info.toDayRequestNum)
- v["lastdowncount"] = info.lastDowncount
- v["lstate"] = info.lstate
- } else {
- v["modifytime"] = ""
- v["yesterday"] = ""
- v["terday"] = ""
- v["lastdowncount"] = 0
- v["lstate"] = ""
- }
- }
- f.ServeJson(map[string]interface{}{"draw": draw, "data": luas, "recordsFiltered": count, "recordsTotal": count})
- } else {
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- f.Render("lualist.html", &f.T)
- }
- }
- //心跳监控
- func (f *Front) Heart() {
- if f.Method() == "POST" {
- event, _ := f.GetInteger("event")
- start, _ := f.GetInteger("start")
- limit, _ := f.GetInteger("length")
- draw, _ := f.GetInteger("draw")
- searchStr := f.GetString("search[value]")
- search := strings.TrimSpace(searchStr) //只能搜索code
- //qu.Debug("search:", search, "start:", start, "limit:", limit, "draw:", draw)
- query := map[string]interface{}{
- "del": false,
- }
- if event > -1 {
- query["event"] = event
- }
- if search != "" {
- query["code"] = search
- }
- sort := `{"%s":%d}`
- orderIndex := f.GetString("order[0][column]")
- orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex))
- orderType := 1
- if f.GetString("order[0][dir]") != "asc" {
- orderType = -1
- }
- sort = fmt.Sprintf(sort, orderName, orderType)
- qu.Debug("query:", query, "sort:", sort)
- list, _ := u.MgoS.Find("spider_heart", query, sort, nil, false, start, limit)
- count := u.MgoS.Count("spider_heart", query)
- for _, l := range *list {
- code := qu.ObjToString(l["code"])
- qu.Debug(code)
- //d, _ := u.MgoE.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1, "param_common": 1, "str_list": 1, "type_list": 1})
- d, _ := u.MgoEB.FindOneByField("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"state": 1, "param_common": 1, "str_list": 1, "type_list": 1, "pendtime": 1})
- l["state"] = (*d)["state"]
- l["param_common"] = (*d)["param_common"]
- if lt := qu.Int64All(l["list"]); lt != 0 {
- l["list"] = qu.FormatDateByInt64(<, qu.Date_Full_Layout)
- } else {
- l["list"] = 0
- }
- if dt := qu.Int64All(l["detail"]); dt != 0 {
- l["detail"] = qu.FormatDateByInt64(&dt, qu.Date_Full_Layout)
- } else {
- l["detail"] = 0
- }
- if det := qu.Int64All(l["detailexecute"]); det != 0 {
- l["detailexecute"] = qu.FormatDateByInt64(&det, qu.Date_Full_Layout)
- } else {
- l["detailexecute"] = 0
- }
- if ft := qu.Int64All(l["findlist"]); ft != 0 {
- l["findlist"] = qu.FormatDateByInt64(&ft, qu.Date_Full_Layout)
- } else {
- l["findlist"] = 0
- }
- ut := qu.Int64All(l["updatetime"])
- l["updatetime"] = qu.FormatDateByInt64(&ut, qu.Date_Full_Layout)
- pendtime := qu.Int64All((*d)["pendtime"])
- if pendtime != 0 {
- l["pendtime"] = qu.FormatDateByInt64(&pendtime, qu.Date_Full_Layout)
- } else {
- l["pendtime"] = "0"
- }
- //l["isfindlist"] = "否"
- //typeList := qu.IntAll((*d)["type_list"])
- //strList := qu.ObjToString((*d)["str_list"])
- //if typeList == 1 && strings.Contains(strList, "findListHtml") {
- // l["isfindlist"] = "是"
- //}
- }
- f.ServeJson(map[string]interface{}{"draw": draw, "data": list, "recordsFiltered": count, "recordsTotal": count})
- } else {
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- f.Render("heart.html", &f.T)
- }
- }
- //爬虫信息
- type spinfo struct {
- code string
- todayDowncount, toDayRequestNum int
- yesterdayDowncount, yestoDayRequestNum int
- totalDowncount, totalRequestNum int
- errorNum, roundCount, runRate int
- lastDowncount int
- lastHeartbeat string
- lstate string
- }
- //爬虫信息
- func SpiderInfo(data string) {
- data = util.Se.DecodeString(data)
- infos := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &infos)
- if err != nil {
- return
- }
- for _, tmp := range infos {
- lastHeartbeat := qu.Int64All(tmp["lastHeartbeat"])
- info := &spinfo{
- code: fmt.Sprint(tmp["code"]),
- todayDowncount: qu.IntAll(tmp["todayDowncount"]),
- toDayRequestNum: qu.IntAll(tmp["toDayRequestNum"]),
- yesterdayDowncount: qu.IntAll(tmp["yesterdayDowncount"]),
- yestoDayRequestNum: qu.IntAll(tmp["yestoDayRequestNum"]),
- totalDowncount: qu.IntAll(tmp["totalDowncount"]),
- totalRequestNum: qu.IntAll(tmp["totalRequestNum"]),
- errorNum: qu.IntAll(tmp["errorNum"]),
- roundCount: qu.IntAll(tmp["roundCount"]),
- runRate: qu.IntAll(tmp["runRate"]),
- lastHeartbeat: qu.FormatDateByInt64(&lastHeartbeat, qu.Date_Full_Layout),
- lastDowncount: qu.IntAll(tmp["lastDowncount"]),
- lstate: fmt.Sprint(tmp["lstate"]),
- }
- spinfos.Store(info.code, info)
- //log.Println(info)
- }
- }
- //接受维护任务信息
- func SpiderModifyTask(data string) {
- data = util.Se.DecodeString(data)
- mtasks := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &mtasks)
- if err != nil {
- return
- }
- for k, tmp := range mtasks {
- log.Println(k, tmp)
- }
- }
- //查看是否有该任务
- func checkTask(codes []string, num int) []string {
- // var id string = ""
- query := map[string]interface{}{}
- var idArr []string
- if len(codes) > 0 {
- for _, v := range codes {
- if num == 1 {
- query = map[string]interface{}{
- "s_code": v,
- "i_state": map[string]interface{}{
- "$in": []int{1, 2, 5},
- },
- }
- } else if num == 2 { //打回时查询待审核的任务
- query = map[string]interface{}{
- "s_code": v,
- "i_state": 3,
- }
- } else if num == 3 { //审核通过时查询待处理、处理中、待审核、未通过的任务
- query = map[string]interface{}{
- "s_code": v,
- "i_state": map[string]interface{}{
- "$in": []int{1, 2, 3, 5},
- },
- }
- } else if num == 7 {
- query = map[string]interface{}{
- "s_code": v,
- "i_state": map[string]interface{}{
- "$in": []int{2, 5},
- },
- }
- }
- task, _ := u.MgoEB.Find("task", query, nil, nil, false, -1, -1)
- if task != nil {
- for _, t := range *task {
- idArr = append(idArr, mongodb.BsonIdToSId(t["_id"]))
- }
- }
- return idArr
- }
- }
- return idArr
- }
|