123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332 |
- package front
- import (
- "encoding/json"
- "errors"
- "fmt"
- "log"
- "luaweb/spider"
- "sort"
- // "math/rand"
- u "luaweb/util"
- mu "mfw/util"
- qu "qfw/util"
- mgdb "qfw/util/mongodb"
- mgu "qfw/util/mongodbutil"
- util "spiderutil"
- "strings"
- "time"
- "gopkg.in/mgo.v2/bson"
- )
- type Base struct {
- SpiderCode string
- SpiderCodeOld string
- SpiderName string
- SpiderChannel string
- SpiderDownDetailPage bool
- SpiderStartPage int
- SpiderMaxPage int
- SpiderRunRate int
- Spider2Collection string
- SpiderPageEncoding string
- SpiderStoreMode int //1,2
- SpiderStoreToMsgEvent int
- SpiderTargetChannelUrl string
- SpiderLastDownloadTime string
- SpiderIsHistoricalMend bool
- SpiderIsMustDownload bool
- }
- type Step1 struct {
- Address string
- ContentChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step2 struct {
- Listadd string
- Listadds string
- BlockChooser string
- AddressChooser string
- TitleChooser string
- DateChooser string
- DateFormat string
- Expert string
- Types int
- }
- type Step3 struct {
- ContentChooser string
- ElementChooser string
- T_title string
- T_href string
- T_date string
- Expert string
- Types int
- }
- //加载某个爬虫
- func (f *Front) LoadSpider(codeTaskIdReState string) error {
- tmpStr := strings.Split(codeTaskIdReState, "__")
- code := tmpStr[0]
- taskId := tmpStr[1]
- auth := qu.IntAll(f.GetSession("auth"))
- restate := -1
- if taskId == "restate=1" { //重采编辑
- restate = 1
- } else if taskId == "restate=2" {
- restate = 2
- } else if taskId == "restate=3" {
- restate = 3
- } else {
- if auth == role_dev && (f.GetSession(taskId) == nil || f.GetSession(taskId) == "") {
- xgTime := time.Unix(time.Now().Unix(), 0).Format("2006-01-02 15:04:05")
- f.SetSession(taskId, xgTime)
- }
- }
- copy := f.GetString("copy")
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(code)
- f.T["actiontext"] = "编辑"
- lua := *mgdb.FindOne("luaconfig", bson.M{"code": code})
- auth := qu.IntAll(f.GetSession("auth"))
- if lua["createuserid"].(string) == f.GetSession("userid").(string) || auth >= 1 {
- if len(lua) > 0 {
- luacopy := map[string]interface{}{}
- if copy != "" {
- luacopy = *mgdb.FindOne("luaconfig", bson.M{"code": copy})
- if len(luacopy) > 0 {
- lua["model"] = luacopy["model"]
- common_copy := luacopy["param_common"].([]interface{})
- common := lua["param_common"].([]interface{})
- common_copy[0] = common[0]
- common_copy[1] = common[1]
- common_copy[2] = common[2]
- common_copy[11] = common[11]
- lua["param_common"] = luacopy["param_common"]
- lua["param_time"] = luacopy["param_time"]
- lua["param_list"] = luacopy["param_list"]
- lua["param_content"] = luacopy["param_content"]
- lua["str_list"] = luacopy["str_list"]
- lua["str_time"] = luacopy["str_time"]
- lua["str_content"] = luacopy["str_content"]
- lua["Thref"] = luacopy["Thref"]
- lua["Tpublishtime"] = luacopy["Tpublishtime"]
- lua["Ttitle"] = luacopy["Ttitle"]
- lua["Tdate"] = luacopy["Tdate"]
- lua["type_content"] = luacopy["type_content"]
- lua["type_list"] = luacopy["type_list"]
- lua["type_time"] = luacopy["type_time"]
- }
- }
- if lua["listcheck"] != nil {
- listcheck := lua["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- lua["listcheck"] = listcheck
- }
- if lua["contentcheck"] != nil {
- contentcheck := lua["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- lua["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent(lua["model"], "", " ")
- lua["js"] = string(js)
- f.T["lua"] = lua
- f.T["taskId"] = taskId
- f.T["restate"] = restate
- f.T["isflow"] = lua["isflow"]
- f.T["spidertype"] = lua["spidertype"]
- f.T["spidermovevent"] = lua["spidermovevent"]
- f.T["spiderhistorymaxpage"] = lua["spiderhistorymaxpage"]
- if lua["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spideredit.html", &f.T)
- }
- } else {
- f.Write("您没有编辑他人脚本的权限")
- }
- }
- return nil
- }
- //查看某个爬虫
- func (f *Front) ViewSpider(id string) error {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth >= 1 {
- if f.Method() == "GET" {
- code := util.Se.Decode4Hex(id)
- f.T["actiontext"] = "编辑"
- lua := *mgdb.FindOne("luaconfig", bson.M{"code": code})
- if len(lua) > 0 {
- if lua["listcheck"] != nil {
- listcheck := lua["listcheck"].(string)
- listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
- listcheck = strings.Replace(listcheck, "\\", "", -1)
- lua["listcheck"] = listcheck
- }
- if lua["contentcheck"] != nil {
- contentcheck := lua["contentcheck"].(string)
- contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
- contentcheck = strings.Replace(contentcheck, "\\", "", -1)
- lua["contentcheck"] = contentcheck
- }
- js, _ := json.MarshalIndent(lua["model"], "", " ")
- lua["js"] = string(js)
- f.T["lua"] = lua
- f.T["isflow"] = lua["isflow"]
- f.T["spidertype"] = lua["spidertype"]
- f.T["spidermovevent"] = lua["spidermovevent"]
- f.T["spiderhistorymaxpage"] = lua["spiderhistorymaxpage"]
- if lua["oldlua"] != nil {
- return f.Render("oldedit.html", &f.T)
- }
- return f.Render("spiderview.html", &f.T)
- } else {
- f.Write("没有对应记录!")
- return nil
- }
- }
- return f.Redirect("/center")
- } else {
- f.Write("您没有查看他人脚本的权限")
- return nil
- }
- }
- func (f *Front) LoadModel(id string) error {
- if f.Method() == "GET" {
- lua := *mgdb.Find("luaconfig", bson.M{"code": id}, nil, bson.M{"model": 1}, true, -1, -1)
- if len(lua) > 0 {
- f.ServeJson(lua[0])
- }
- }
- return f.Redirect("/center")
- }
- func (f *Front) SaveStep() {
- userid, _ := f.GetSession("userid").(string)
- auth := qu.IntAll(f.GetSession("auth"))
- rep := map[string]interface{}{}
- if f.GetString("oldlua") != "" {
- id := f.GetString("code")
- one := *mgdb.FindOne("luaconfig", bson.M{"code": id})
- id = one["code"].(string)
- script := f.GetStringComm("script")
- if strings.Index(script, id) == -1 {
- rep["msg"] = "code/名称都不能更改"
- f.ServeJson(rep)
- return
- } else {
- upset := bson.M{"luacontent": script}
- upset["modifytime"] = time.Now().Unix()
- b := mgdb.Update("luaconfig", bson.M{"code": id}, bson.M{"$set": upset}, true, false)
- if b {
- rep["msg"] = "保存成功"
- rep["code"] = util.Se.Encode2Hex(id)
- f.ServeJson(rep)
- return
- }
- }
- } else {
- if f.Base.SpiderName != "" && f.Base.SpiderCode != "" {
- code := f.Base.SpiderCode
- one := *mgdb.FindOne("luaconfig", bson.M{"code": f.Base.SpiderCode})
- state := qu.IntAllDef(one["state"], 0)
- restate := qu.IntAll(one["restate"])
- comeintime := time.Now().Unix()
- if len(one) > 0 {
- comeintime = qu.Int64All(one["comeintime"])
- ouserid := one["createuserid"].(string)
- if ouserid != userid && auth == role_dev {
- f.Write("权限不够,不能修改他人脚本")
- return
- } else {
- code = one["code"].(string)
- f.Base.SpiderCode = one["code"].(string)
- f.Base.SpiderName = (one["param_common"].([]interface{}))[1].(string)
- }
- } else {
- if auth != role_admin {
- f.Write("不能新建爬虫,请联系管理员导入")
- return
- }
- }
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- if auth == role_dev {
- //f.Base.SpiderStoreToMsgEvent = 4002
- }
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- }
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- param := map[string]interface{}{}
- common[4] = 1
- param["param_common"] = common
- //向导模式
- param["param_time"] = ptime
- param["param_list"] = list
- param["param_content"] = content
- param["type_time"] = f.Step1.Types
- param["type_list"] = f.Step2.Types
- param["type_content"] = f.Step3.Types
- //专家模式
- param["str_time"] = f.Step1.Expert
- param["str_list"] = f.Step2.Expert
- param["str_content"] = f.Step3.Expert
- param["comeintime"] = comeintime
- listcheck = strings.Replace(listcheck, "\n", "\\\\n", -1)
- param["listcheck"] = strings.Replace(listcheck, "\"", "\\\\\"", -1)
- contentcheck = strings.Replace(contentcheck, "\n", "\\\\n", -1)
- param["contentcheck"] = strings.Replace(contentcheck, "\"", "\\\\\"", -1)
- //补充模型
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- model["model"] = s_model
- param["code"] = f.Base.SpiderCode
- param["model"] = model
- if len(one) > 0 {
- param["createuser"] = one["createuser"]
- param["createuserid"] = one["createuserid"]
- param["code"] = one["code"]
- //开发员关联任务修改爬虫状态
- state = qu.IntAll(one["state"])
- if auth == role_dev && state >= Sp_state_3 && restate != 1 { //开发员修改,已经审核通过(不包含已上架),状态重置为待完成(restate!=1判断,重采修改保存爬虫时不修改爬虫状态)
- param["state"] = 0
- } else {
- param["state"] = state
- }
- } else {
- param["createuser"] = f.GetSession("loginuser")
- param["createuserid"] = f.GetSession("userid")
- param["createuseremail"] = f.GetSession("email")
- param["next"] = f.GetSession("email")
- param["state"] = 0
- }
- if qu.ObjToString(one["modifyuser"]) == "" {
- param["modifyuser"] = param["createuser"]
- param["modifyuserid"] = param["createuserid"]
- }
- param["modifytime"] = time.Now().Unix()
- param["Ttitle"] = f.Step3.T_title
- param["Thref"] = f.Step3.T_href
- param["Tdate"] = f.Step3.T_date
- //其他信息
- param["isflow"] = f.OtherBase.IsFlow
- param["spidertype"] = f.OtherBase.SpiderType
- param["spidermovevent"] = f.OtherBase.SpiderMoveEvent
- param["spiderhistorymaxpage"] = f.OtherBase.SpiderHistoryMaxPage
- if f.OtherBase.SpiderType == "history" { //爬虫类型是history的放到7000节点,并记录历史节点
- param["event"] = 7000
- if event := qu.IntAll(one["event"]); event != 7000 {
- param["historyevent"] = event
- }
- }
- if f.OtherBase.SpiderMoveEvent == "7700" {
- param["historyevent"] = 7700
- }
- issave := spider.SaveSpider(code, param)
- if issave {
- for k, v := range one {
- if k != "_id" && param[k] == nil {
- param[k] = v
- }
- }
- Wlog(f.Base.SpiderName, f.Base.SpiderCode, f.GetSession("username").(string), f.GetSession("userid").(string), "修改", param)
- rep["msg"] = "保存成功"
- } else {
- rep["msg"] = "保存失败"
- }
- rep["code"] = util.Se.Encode2Hex(code)
- f.ServeJson(rep)
- }
- }
- }
- //方法测试
- func (f *Front) RunStep() {
- imodal, _ := f.GetInteger("imodal")
- script, _ := f.GetBool("script")
- listcheck := f.GetString("listcheck")
- contentcheck := f.GetString("contentcheck")
- downloadnode := f.GetString("downloadnode") //下载节点
- common := []interface{}{
- f.Base.SpiderCode,
- f.Base.SpiderName,
- f.Base.SpiderChannel,
- f.Base.SpiderDownDetailPage,
- f.Base.SpiderStartPage,
- f.Base.SpiderMaxPage,
- f.Base.SpiderRunRate,
- f.Base.Spider2Collection,
- f.Base.SpiderPageEncoding,
- f.Base.SpiderStoreMode,
- f.Base.SpiderStoreToMsgEvent,
- f.Base.SpiderTargetChannelUrl,
- f.Base.SpiderLastDownloadTime,
- f.Base.SpiderIsHistoricalMend,
- f.Base.SpiderIsMustDownload,
- "",
- "",
- "",
- }
- if f.Method() == "POST" {
- switch f.GetString("step") {
- case "step1":
- ptime := []interface{}{
- f.Step1.DateFormat,
- f.Step1.Address,
- f.Step1.ContentChooser,
- }
- if script {
- _, scripts := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal, 1)
- f.ServeJson(scripts)
- return
- }
- rs, err := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- }
- case "step2":
- addrs := strings.Split(f.Step2.Listadds, "\n")
- if len(addrs) > 0 {
- for k, v := range addrs {
- addrs[k] = "'" + v + "'"
- }
- f.Step2.Listadds = strings.Join(addrs, ",")
- } else if len(f.Step2.Listadds) > 5 {
- f.Step2.Listadds = "'" + f.Step2.Listadds + "'"
- } else {
- f.Step2.Listadds = ""
- }
- list := []interface{}{
- f.Step2.Listadd,
- f.Step2.Listadds,
- f.Step2.BlockChooser,
- f.Step2.AddressChooser,
- f.Step2.TitleChooser,
- f.Step2.DateChooser,
- f.Step2.DateFormat,
- }
- listcheck = strings.Replace(listcheck, "\n", "\\n", -1)
- listcheck = strings.Replace(listcheck, "\"", "\\\"", -1)
- s_model := f.GetString("model")
- configModel := util.Config.Model[s_model]
- model := map[string]interface{}{}
- for k, _ := range configModel {
- model[k] = f.GetString(k)
- }
- if script {
- _, script := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal)
- if err == nil {
- f.ServeJson(rs)
- } else if err.(error).Error() == "no" {
- f.ServeJson(rs[0])
- }
- case "step3":
- content := []interface{}{
- f.Step3.ContentChooser,
- f.Step3.ElementChooser,
- }
- contentcheck = strings.Replace(contentcheck, "\n", "\\n", -1)
- contentcheck = strings.Replace(contentcheck, "\"", "\\\"", -1)
- data := map[string]interface{}{}
- data["title"] = f.Step3.T_title
- data["href"] = f.Step3.T_href
- data["publishtime"] = f.Step3.T_date
- if script {
- _, script := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal, 1)
- f.ServeJson(script)
- return
- }
- rs, err := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal)
- if projectinfo, ok := rs["projectinfo"].(map[string]interface{}); ok && projectinfo != nil {
- if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok && attachments != nil {
- for _, tmp := range attachments {
- tmpMap := tmp.(map[string]interface{})
- if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" {
- rs["msg"] = "附件中含有乱码"
- }
- }
- }
- }
- if err == nil {
- f.ServeJson(rs)
- } else {
- f.ServeJson(rs["no"])
- }
- }
- }
- }
- //爬虫测试数据json
- func (f *Front) GetJson() {
- list, _ := f.GetSession("listInfo").([]map[string]interface{})
- data, _ := f.GetSession("dataInfo").(map[string]interface{})
- descript := f.GetSession("task_descript")
- remark := f.GetSession("task_remark")
- reason := f.GetSession("reason")
- username := f.GetSession("username").(string)
- msg := f.GetSession(username + "_msg")
- if len(data) > 0 {
- data["contenthtml"] = ""
- }
- for k, v := range list {
- v["a_index"] = k + 1
- }
- f.T["list"] = list
- f.T["data"] = data
- f.T["descript"] = descript
- f.T["remark"] = remark
- f.T["reason"] = reason
- f.T["msg"] = msg
- f.DelSession("listInfo")
- f.DelSession("dataInfo")
- f.DelSession("task_descript")
- f.DelSession("task_remark")
- f.DelSession("reason")
- f.Render("jsonInfo.html", &f.T)
- }
- //整体测试
- func (f *Front) SpiderPass() {
- defer mu.Catch()
- list := []map[string]interface{}{}
- data := map[string]interface{}{}
- msg1 := ""
- code := f.GetString("code")
- downloadnode := f.GetString("node")
- //根据code查询待确认任务
- query := bson.M{
- "s_code": code,
- "i_state": 3,
- }
- task := *mgdb.FindOne("task", query)
- descript := "null"
- remark := "null"
- remarktmp := []string{}
- if len(task) > 0 {
- descript = task["s_descript"].(string)
- if mrecord, ok := task["a_mrecord"].([]interface{}); ok {
- for _, m := range mrecord {
- remarkInfo := m.(map[string]interface{})
- r := remarkInfo["s_mrecord_remark"].(string)
- if r != "" {
- remarktmp = append(remarktmp, r+";")
- }
- }
- }
- }
- if len(remarktmp) > 0 {
- remark = ""
- remark = strings.Join(remarktmp, "")
- }
- f.SetSession("task_remark", remark)
- f.SetSession("task_descript", descript)
- //基本信息、方法一、方法二、方法三、总请求次数、go方法一、go方法二、go方法三、列表页条数
- steps := []interface{}{false, false, false, false, 0, 0, 0, 0, 0}
- one := *mgdb.FindOne("luaconfig", bson.M{"code": code})
- reason, _ := one["reason"].(string)
- f.SetSession("reason", reason)
- if len(one) > 0 && one["oldlua"] == nil {
- common := one["param_common"].([]interface{})
- if len(common) < 13 {
- f.ServeJson(steps)
- return
- } else {
- steps[0] = true
- }
- } else {
- steps[0] = true
- }
- script, liststr, contentstr := "", "", ""
- if one["oldlua"] == nil {
- script, liststr, contentstr = spider.GetScript(code)
- } else {
- script = one["luacontent"].(string)
- }
- if liststr != "" && contentstr != "" {
- msg1 = u.SpiderPassCheckLua(liststr, contentstr, one)
- }
- s := spider.CreateSpider(downloadnode, script)
- s.SpiderMaxPage = 1
- s.Timeout = 60
- time, timeerr := s.GetLastPublishTime()
- if timeerr == nil && len(time) > 4 {
- steps[1] = true
- list, _ = s.DownListPageItem()
- if len(list) > 0 {
- f.SetSession("listInfo", list)
- listone := list[0]
- if len(qu.ObjToString(listone["href"])) < 7 ||
- (qu.ObjToString(listone["publishtime"]) != "0" && len(qu.ObjToString(listone["publishtime"])) < 5) ||
- len(qu.ObjToString(listone["title"])) < 3 {
- f.ServeJson(steps)
- return
- } else {
- steps[2] = true
- if s.DownDetail {
- param := map[string]string{}
- index := 0
- if len(list) > 0 {
- steps[8] = len(list)
- index = len(list) / 2
- for k, v := range list[index] {
- param[k] = qu.ObjToString(v)
- }
- data = map[string]interface{}{}
- s.DownloadDetailPage(param, data)
- if len(data) > 0 {
- f.SetSession("dataInfo", data)
- } else {
- f.SetSession("dataInfo", "")
- }
- if len(data) == 0 || data["detail"].(string) == "" {
- steps[3] = false
- } else {
- steps[3] = true
- }
- }
- } else {
- steps[3] = true
- }
- }
- } else {
- f.SetSession("listInfo", "")
- f.SetSession("dataInfo", "")
- }
- }
- //关闭laustate
- s.L.Close()
- steps[4] = s.Test_luareqcount
- steps[5] = s.Test_goreqtime
- steps[6] = s.Test_goreqlist
- steps[7] = s.Test_goreqcon
- //校验
- msg := u.SpiderPassCheckListAndDetail(list, data)
- if msg1 != "" {
- msg = msg1 + "," + msg
- }
- username := f.GetSession("username").(string)
- f.SetSession(username+"_msg", msg)
- f.ServeJson(steps)
- }
- func (f *Front) DownSpider(id string) {
- //auth := qu.IntAll(f.GetSession("auth"))
- //if auth > role_dev {
- one := *mgdb.FindOne("luaconfig", bson.M{"code": id})
- script := ""
- filename := id + ".lua"
- if len(one) > 0 {
- if one["oldlua"] != nil {
- if one["luacontent"] != nil {
- script = one["luacontent"].(string)
- }
- } else {
- user := *mgdb.FindOne("user", bson.M{"_id": bson.ObjectIdHex(one["createuserid"].(string))})
- name := one["createuser"]
- email := user["s_email"]
- upload := time.Now().Format("2006-01-02 15:04:05")
- script, _, _ = spider.GetScript(id, name, email, upload)
- }
- }
- f.ResponseWriter.Header().Del("Content-Type")
- f.ResponseWriter.Header().Add("Content-Type", "application/x-download")
- f.ResponseWriter.Header().Add("Content-Disposition", "attachment;filename=spider_"+filename)
- f.WriteBytes([]byte(script))
- // } else {
- // f.Write("您没有权限")
- // }
- }
- //更新爬虫状态
- func (f *Front) UpState() error {
- username := f.GetSession("username").(string)
- code := f.GetString("code")
- state, _ := f.GetInt("state")
- id := f.GetString("taskId")
- reason := f.GetString("reason")
- auth := qu.IntAll(f.GetSession("auth"))
- var codeArr = []string{code}
- var taskid []string
- //修改任务状态
- istotask := false
- res := map[string]interface{}{
- "istotask": istotask,
- "err": "没有权限",
- "code": util.Se.Encode2Hex(code),
- "taskid": taskid,
- }
- var xgTime int64
- if f.GetSession(id) == nil || f.GetSession(id) == "" {
- xgTime = time.Now().Unix()
- } else {
- xgTimeStr := qu.ObjToString(f.GetSession(id))
- xgTimeTmp, _ := time.ParseInLocation("2006-01-02 15:04:05", xgTimeStr, time.Local)
- xgTime = xgTimeTmp.Unix()
- }
- f.DelSession(id)
- if IsHasUpState(auth, int(state)) {
- b, err := UpStateAndUpSpider(code, "", reason, username, int(state)) //更新爬虫状态
- if b && state == Sp_state_1 { //提交审核
- //有对应任务跳转提交记录页
- taskid = checkTask(codeArr, 1)
- if len(taskid) > 0 {
- res["istotask"] = true
- res["taskid"] = taskid[0]
- }
- } else if b && state == Sp_state_2 { //打回
- taskid = checkTask(codeArr, 2)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 2) //修改状态
- UpTaskState(taskid, 2, "", int64(0)) //修改任务状态
- SaveRemark(taskid, reason, username) //保存记录信息
- }
- } else if b && state == Sp_state_3 { //审核通过
- taskid = checkTask(codeArr, 3)
- if len(taskid) > 0 {
- //UpTaskState([]string{taskid}, 3)
- UpTaskState(taskid, 3, "", int64(0))
- SaveRemark(taskid, "", username)
- }
- } else if b && state == Sp_state_6 { //下架
- //下架成功删除download数据
- flag := delDownloadData(code)
- log.Println(code, "---下架删除download数据:", flag)
- } else if b && state == Sp_state_7 { //反馈
- taskid = checkTask(codeArr, 7)
- if len(taskid) > 0 {
- UpTaskState(taskid, 7, reason, xgTime)
- }
- }
- if err != nil {
- res["err"] = err.Error()
- f.ServeJson(res)
- } else {
- res["err"] = ""
- f.ServeJson(res)
- }
- } else {
- f.ServeJson(res)
- }
- return nil
- }
- //下架删除download数据
- func delDownloadData(code string) bool {
- return mgu.Del("download", "spider", "spider", `{"code":"`+code+`"}`)
- }
- //批量作废删除download数据
- func disableDelDownloadData(code []string) {
- for _, v := range code {
- flag := delDownloadData(v)
- log.Println(code, "---批量删除download数据:", flag)
- }
- }
- //爬虫核对
- func (f *Front) Checktime() {
- code := f.GetString("code")
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson(false)
- } else {
- b := mgdb.Update("luaconfig", `{"code":"`+code+`"}`, `{"$set":{"l_checktime":`+fmt.Sprint(time.Now().Unix())+`}}`, true, false)
- f.ServeJson(b)
- }
- }
- //批量作废
- func (f *Front) Disables() error {
- auth := qu.IntAll(f.GetSession("auth"))
- names := strings.Split(f.GetString("names"), ",")
- ids := strings.Split(f.GetString("ids"), ",")
- codes := strings.Split(f.GetString("codes"), ",")
- disablereason := f.GetString("disablereason")
- res := ""
- if IsHasUpState(auth, Sp_state_4) {
- for k, id := range ids {
- b, err := UpStateAndUpSpider("", id, disablereason, "", Sp_state_4)
- if b { //作废成功
- //修改任务状态
- UpTaskState(codes, 4, "", int64(0))
- //删除download表数据
- //go disableDelDownloadData(codes)
- if err != nil {
- res = res + names[k] + ",ok" + qu.ObjToString(err.Error()) + ";"
- } else {
- res = res + names[k] + ",ok" + ";"
- }
- } else {
- res = res + names[k] + "," + qu.ObjToString(err.Error()) + ";"
- }
- }
- } else {
- res = "没有权限"
- }
- f.ServeJson(res)
- return nil
- }
- //批量上下架
- func (f *Front) BatchShelves() {
- codes := strings.Split(f.GetString("codes"), ",")
- state, _ := f.GetInteger("state")
- auth := qu.IntAll(f.GetSession("auth"))
- errCode := []string{}
- var err error
- b := false
- if IsHasUpState(auth, Sp_state_5) {
- if state == 5 { //批量上架
- for _, code := range codes {
- _, err = UpStateAndUpSpider(code, "", "", "", Sp_state_5)
- if err != nil {
- errCode = append(errCode, code)
- }
- }
- } else { //批量下架
- for _, code := range codes {
- b, err = UpStateAndUpSpider(code, "", "", "", Sp_state_6)
- if err != nil {
- errCode = append(errCode, code)
- }
- //下架删除download数据
- if b {
- flag := delDownloadData(code)
- log.Println(code, "---删除download数据:", flag)
- }
- }
- }
- } else {
- errCode = append(errCode, "没有权限")
- }
- f.ServeJson(errCode)
- }
- //更新爬虫状态,并判断是否更新节点爬虫
- func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, error) {
- upresult := false
- var err error
- one := map[string]interface{}{}
- if code != "" {
- one = *mgdb.FindOne("luaconfig", bson.M{"code": code})
- } else {
- one = *mgdb.FindOne("luaconfig", bson.M{"_id": bson.ObjectIdHex(id)})
- code = one["code"].(string)
- }
- if len(one) > 0 {
- var event int
- if one["event"] != nil {
- event = qu.IntAll(one["event"])
- } else {
- for k, _ := range util.Config.Uploadevents { //?
- event = qu.IntAll(k)
- break
- }
- //r := rand.New(rand.NewSource(time.Now().UnixNano()))
- //event = util.Config.Uploadevents[r.Intn(len(util.Config.Uploadevents))]
- }
- //oldstate := qu.IntAll(one["state"])
- switch state {
- case Sp_state_4: //作废
- // if oldstate == Sp_state_5 {
- // upresult = false
- // err = errors.New("已上架不允许作废")
- // } else {
- // upresult = true
- // }
- upresult, err = spider.UpdateSpiderByCodeState(code, "6", event) //下架
- case Sp_state_5, Sp_state_6: //上下架
- upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event)
- //log.Println(upresult, err)
- default:
- upresult = true
- err = nil
- }
- if err != nil && strings.Contains(err.Error(), "timeout") {
- err = errors.New("连接节点" + fmt.Sprint(event) + "超时")
- upresult = true
- }
- if upresult && err == nil {
- upset := bson.M{"state": state} //修改状态
- if one["oldlua"] != nil { //老脚本上传
- upresult = mgdb.Update("luaconfig", bson.M{"code": code}, bson.M{"$set": upset}, true, false)
- } else {
- if state == Sp_state_1 { //提交审核
- upset["l_complete"] = time.Now().Unix()
- upset["report"] = ""
- } else if state == Sp_state_3 { //发布
- if one["event"] == nil {
- upset["event"] = event
- upset["modifytime"] = time.Now().Unix()
- }
- upset["l_uploadtime"] = time.Now().Unix()
- } else if state == Sp_state_2 { //打回原因
- upset["reason"] = reason
- } else if state == Sp_state_7 { //反馈问题
- upset["report"] = reason
- upset["state"] = 1 //反馈后爬虫改为待审核
- } else if state == Sp_state_5 { //上架,核对时间重置
- upset["l_checktime"] = 0
- } else if state == Sp_state_4 { //作废,作废原因
- upset["disablereason"] = reason
- upset["modifytime"] = time.Now().Unix()
- }
- upresult = mgdb.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false)
- if state == Sp_state_1 { //提交审核,验证是否提交成功
- for i := 1; i <= 5; i++ { //解决提交不上,重试5次
- lua := *mgdb.FindOne("luaconfig", map[string]interface{}{"code": code})
- tmpState := qu.IntAll(lua["state"])
- if state == tmpState {
- break
- } else {
- upresult = mgdb.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false)
- upresult = false
- }
- }
- }
- qu.Debug("提交日志:", code, upset, upresult)
- if upresult && (state == Sp_state_2 || state == Sp_state_3) { //打回、审核记录日志
- types := "打回"
- if state == Sp_state_3 {
- types = "审核"
- }
- obj := bson.M{
- "code": code,
- "auditor": username,
- "types": types,
- "comeintime": time.Now().Unix(),
- "reason": reason,
- "spideruser": one["createuser"],
- "modifytime": one["modifytime"],
- }
- mgdb.Save("lua_logs_auditor", obj)
- }
- }
- }
- }
- return upresult, err
- }
- //保存记录信息
- func SaveRemark(taskid []string, reason, username string) {
- timeNow := time.Now().Unix()
- if reason == "" {
- reason = "审核通过"
- }
- for _, id := range taskid {
- query := bson.M{
- "_id": bson.ObjectIdHex(string(id)),
- }
- task := *mgdb.FindOne("task", query)
- if task != nil {
- checkData := task["a_check"]
- var checkArr []map[string]interface{}
- newData := make(map[string]interface{})
- newData["s_check_checkUser"] = username
- newData["l_check_checkTime"] = timeNow
- newData["s_check_checkRemark"] = reason
- if checkData != nil {
- myArr := qu.ObjArrToMapArr(checkData.([]interface{}))
- if myArr != nil && len(myArr) > 0 {
- for _, v := range myArr {
- checkArr = append(checkArr, v)
- }
- }
- }
- checkArr = append(checkArr, newData)
- task["a_check"] = checkArr
- mgdb.Update("task", query, map[string]interface{}{
- "$set": task,
- }, false, false)
- }
- }
- }
- //修改任务状态
- func UpTaskState(code []string, num int, reason string, startTime int64) {
- query := bson.M{}
- update := bson.M{}
- for _, v := range code {
- if num == 1 || num == 2 || num == 3 || num == 7 {
- query = bson.M{
- "_id": bson.ObjectIdHex(v),
- }
- } else {
- query = bson.M{
- "s_code": v,
- }
- }
- if num == 1 { //提交审核
- update = bson.M{
- "$set": bson.M{
- "i_state": 3,
- },
- }
- } else if num == 2 { //打回 -->未通过
- update = bson.M{
- "$set": bson.M{
- "i_state": 5,
- },
- }
- } else if num == 3 { //发布(审核通过) -->审核通过
- update = bson.M{
- "$set": bson.M{
- "i_state": 4,
- },
- }
- } else if num == 4 { //批量作废 -->关闭
- update = bson.M{
- "$set": bson.M{
- "i_state": 6,
- "l_complete": time.Now().Unix(),
- },
- }
- } else if num == 7 { //反馈信息 -->待审核
- newData := map[string]interface{}{
- "l_mrecord_comeintime": startTime,
- "l_mrecord_complete": time.Now().Unix(),
- "s_mrecord_remark": reason,
- }
- mrecord := []interface{}{}
- mrecord = append(mrecord, newData)
- update = bson.M{
- "$set": bson.M{
- "i_state": 3,
- "l_complete": time.Now().Unix(),
- "a_mrecord": mrecord,
- },
- }
- }
- flag := mgdb.Update("task", query, update, false, true)
- log.Println("codeOrId:", query, " 修改任务状态:", flag)
- }
- }
- //更新节点
- func (f *Front) ChangeEvent() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson("没有权限")
- }
- code := f.GetString("code")
- event, _ := f.GetInt("event")
- eventok := false
- for k, _ := range util.Config.Uploadevents {
- if event == qu.Int64All(k) {
- eventok = true
- break
- }
- }
- if !eventok {
- f.ServeJson("没有对应节点")
- return
- }
- info := *mgdb.FindOne("luaconfig", `{"code":"`+code+`"}`)
- if len(info) > 0 {
- oldevent := qu.IntAll(info["event"])
- if qu.IntAll(info["state"]) == Sp_state_5 {
- //源节点下架
- _, err := spider.UpdateSpiderByCodeState(code, fmt.Sprint(Sp_state_6), oldevent)
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- "state": Sp_state_6,
- },
- }
- mgdb.Update("luaconfig", `{"code":"`+code+`"}`, set, true, false)
- if err != nil && strings.Contains(err.Error(), "timeout") {
- f.ServeJson("连接节点" + fmt.Sprint(oldevent) + "超时")
- } else {
- f.ServeJson(err.Error())
- }
- } else {
- set := map[string]interface{}{
- "$set": map[string]interface{}{
- "event": qu.IntAll(event),
- },
- }
- mgdb.Update("luaconfig", `{"code":"`+code+`"}`, set, true, false)
- }
- } else {
- f.ServeJson("没有对应记录")
- }
- }
- //验证用户是否有更改状态权限
- func IsHasUpState(auth, state int) bool {
- rep := false
- switch auth {
- case role_dev:
- if state == Sp_state_1 || state == Sp_state_7 {
- rep = true
- }
- case role_examine:
- if state == Sp_state_2 || state == Sp_state_3 {
- rep = true
- }
- case role_admin:
- rep = true
- default:
- }
- return rep
- }
- var list_fields = `{"_id":1,"code":1,"createuser":1,"modifyuser":1,"modifytime":1,"l_uploadtime":1,"l_checktime":1,"state":1,"param_common":1,"event":1,"urgency":1}`
- //脚本管理,结合爬虫运行信息
- func (f *Front) LuaList() {
- auth := qu.IntAll(f.GetSession("auth"))
- if auth != role_admin {
- f.ServeJson("没有权限!")
- return
- }
- if f.Method() == "POST" {
- state, _ := f.GetInteger("state")
- event, _ := f.GetInteger("event")
- start, _ := f.GetInteger("start")
- limit, _ := f.GetInteger("length")
- draw, _ := f.GetInteger("draw")
- searchStr := f.GetString("search[value]")
- //search := strings.Replace(searchStr, " ", "", -1)
- search := strings.TrimSpace(searchStr)
- query := bson.M{}
- q1 := bson.M{}
- q1["$or"] = []interface{}{
- bson.M{"code": bson.M{"$regex": search}},
- bson.M{"createuser": bson.M{"$regex": search}},
- bson.M{"param_common.1": bson.M{"$regex": search}},
- }
- q2 := bson.M{}
- if state > -1 {
- q2 = bson.M{"state": state}
- } else {
- q2["$or"] = []interface{}{
- bson.M{"state": Sp_state_3},
- bson.M{"state": Sp_state_5},
- bson.M{"state": Sp_state_6},
- }
- }
- q3 := bson.M{}
- if event > -1 {
- q3 = bson.M{"event": event}
- }
- if search != "" {
- query["$and"] = []interface{}{q1, q2, q3}
- } else {
- query["$and"] = []interface{}{q2, q3}
- }
- sort := `{"%s":%d}`
- orderIndex := f.GetString("order[0][column]")
- orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex))
- orderType := 1
- if f.GetString("order[0][dir]") != "asc" {
- orderType = -1
- }
- sort = fmt.Sprintf(sort, orderName, orderType)
- page := start / 10
- luas := *mgdb.Find("luaconfig", query, sort, list_fields, false, start, limit)
- count := mgdb.Count("luaconfig", query)
- for k, v := range luas {
- v["num"] = k + 1 + page*10
- l_uploadtime := qu.Int64All(v["l_uploadtime"])
- v["l_uploadtime"] = qu.FormatDateByInt64(&l_uploadtime, qu.Date_Full_Layout)
- l_checktime := qu.Int64All(v["l_checktime"])
- v["l_checktime"] = qu.FormatDateByInt64(&l_checktime, qu.Date_Full_Layout)
- if l_checktime > 0 { //核对
- v["is_check"] = true
- } else { //未核对
- v["is_check"] = false
- }
- if tmp, ok := spinfos.Load(v["code"]); ok {
- info := tmp.(*spinfo)
- v["modifytime"] = info.lastHeartbeat
- v["yesterday"] = fmt.Sprint(info.yesterdayDowncount) + "/" + fmt.Sprint(info.yestoDayRequestNum)
- v["terday"] = fmt.Sprint(info.todayDowncount) + "/" + fmt.Sprint(info.toDayRequestNum)
- v["lastdowncount"] = info.lastDowncount
- v["lstate"] = info.lstate
- } else {
- v["modifytime"] = ""
- v["yesterday"] = ""
- v["terday"] = ""
- v["lastdowncount"] = 0
- v["lstate"] = ""
- }
- }
- f.ServeJson(map[string]interface{}{"draw": draw, "data": luas, "recordsFiltered": count, "recordsTotal": count})
- } else {
- events := []string{}
- for k, _ := range util.Config.Uploadevents {
- events = append(events, k)
- }
- sort.Strings(events)
- f.T["events"] = events
- f.Render("lualist.html", &f.T)
- }
- }
- //爬虫信息
- type spinfo struct {
- code string
- todayDowncount, toDayRequestNum int
- yesterdayDowncount, yestoDayRequestNum int
- totalDowncount, totalRequestNum int
- errorNum, roundCount, runRate int
- lastDowncount int
- lastHeartbeat string
- lstate string
- }
- //爬虫信息
- func SpiderInfo(data string) {
- data = util.Se.DecodeString(data)
- infos := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &infos)
- if err != nil {
- return
- }
- for _, tmp := range infos {
- lastHeartbeat := qu.Int64All(tmp["lastHeartbeat"])
- info := &spinfo{
- code: fmt.Sprint(tmp["code"]),
- todayDowncount: qu.IntAll(tmp["todayDowncount"]),
- toDayRequestNum: qu.IntAll(tmp["toDayRequestNum"]),
- yesterdayDowncount: qu.IntAll(tmp["yesterdayDowncount"]),
- yestoDayRequestNum: qu.IntAll(tmp["yestoDayRequestNum"]),
- totalDowncount: qu.IntAll(tmp["totalDowncount"]),
- totalRequestNum: qu.IntAll(tmp["totalRequestNum"]),
- errorNum: qu.IntAll(tmp["errorNum"]),
- roundCount: qu.IntAll(tmp["roundCount"]),
- runRate: qu.IntAll(tmp["runRate"]),
- lastHeartbeat: qu.FormatDateByInt64(&lastHeartbeat, qu.Date_Full_Layout),
- lastDowncount: qu.IntAll(tmp["lastDowncount"]),
- lstate: fmt.Sprint(tmp["lstate"]),
- }
- spinfos.Store(info.code, info)
- //log.Println(info)
- }
- }
- //接受维护任务信息
- func SpiderModifyTask(data string) {
- data = util.Se.DecodeString(data)
- mtasks := []map[string]interface{}{}
- err := json.Unmarshal([]byte(data), &mtasks)
- if err != nil {
- return
- }
- for k, tmp := range mtasks {
- log.Println(k, tmp)
- }
- }
- //查看是否有该任务
- func checkTask(codes []string, num int) []string {
- // var id string = ""
- query := bson.M{}
- var idArr []string
- if len(codes) > 0 {
- for _, v := range codes {
- if num == 1 {
- query = bson.M{
- "s_code": v,
- "i_state": bson.M{
- "$in": []int{1, 2, 5},
- },
- }
- } else if num == 2 { //打回时查询待审核的任务
- query = bson.M{
- "s_code": v,
- "i_state": 3,
- }
- } else if num == 3 { //审核通过时查询待处理、处理中、待审核、未通过的任务
- query = bson.M{
- "s_code": v,
- "i_state": bson.M{
- "$in": []int{1, 2, 3, 5},
- },
- }
- } else if num == 7 {
- query = bson.M{
- "s_code": v,
- "i_state": bson.M{
- "$in": []int{2, 5},
- },
- }
- }
- //task := *mgdb.FindOne("task", query)
- task := *mgdb.Find("task", query, nil, nil, false, -1, -1)
- if task != nil {
- for _, v := range task {
- //id = v["_id"].(bson.ObjectId).Hex()
- idArr = append(idArr, v["_id"].(bson.ObjectId).Hex())
- }
- }
- return idArr
- }
- }
- return idArr
- }
|