package front import ( "encoding/json" "errors" "fmt" "log" mu "mfw/util" "mongodb" qu "qfw/util" "sort" "spider" sp "spiderutil" "strconv" "strings" "time" u "util" ) type Base struct { SpiderCode string SpiderCodeOld string SpiderName string SpiderChannel string SpiderDownDetailPage bool SpiderStartPage int SpiderMaxPage int SpiderRunRate int Spider2Collection string SpiderPageEncoding string SpiderStoreMode int //1,2 SpiderStoreToMsgEvent int SpiderTargetChannelUrl string SpiderLastDownloadTime string SpiderIsHistoricalMend bool SpiderIsMustDownload bool } type Step1 struct { Address string ContentChooser string DateFormat string Expert string Types int } type Step2 struct { Listadd string Listadds string BlockChooser string AddressChooser string TitleChooser string DateChooser string DateFormat string Expert string Types int //chromedp相关 Chrome string RangeChrome string RangeTimes int } type Step3 struct { ContentChooser string ElementChooser string T_title string T_href string T_date string Expert string Types int Chrome string } type StepRe3 struct { Checked bool Expert string } type OtherBase struct { IsFlow int //爬虫所采集数据是否参与数据流程标识 SpiderType string //爬虫类型:increment增量;history历史 SpiderHistoryMaxPage int //采集历史数据时的采集最大页 SpiderMoveEvent string //爬虫采集完历史后要转移到的节点 comm:队列模式、bid:高性能模式 } // 加载某个爬虫 func (f *Front) LoadSpider(codeTaskIdReState string) error { tmpStr := strings.Split(codeTaskIdReState, "__") code := tmpStr[0] text := tmpStr[1] auth := qu.IntAll(f.GetSession("auth")) restate := -1 if text == "1" { //重采编辑 restate = 1 } else if text == "2" { restate = 2 } else if text == "3" { restate = 3 } else if text != "bu" { if auth == u.Role_Dev && qu.ObjToString(f.GetSession(text)) == "" { xgTime := time.Unix(time.Now().Unix(), 0).Format("2006-01-02 15:04:05") f.SetSession(text, xgTime) } } copy := f.GetString("copy") if f.Method() == "GET" { code := sp.Se.Decode4Hex(code) f.T["actiontext"] = "编辑" //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code}) lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code}) //爬虫开发人员编辑爬虫,修改爬虫信息 if auth == u.Role_Dev && text != "bu" && qu.ObjToString((*lua)["spidertype"]) == "increment" { (*lua)["spidertype"] = "history" (*lua)["incrementevent"] = (*lua)["event"] } if qu.ObjToString((*lua)["createuserid"]) == f.GetSession("userid").(string) || auth >= 1 { if len(*lua) > 0 { if qu.IntAll((*lua)["event"]) == 7000 && qu.IntAll((*lua)["urgency"]) == 0 && qu.IntAll((*lua)["state"]) == 0 { q := map[string]interface{}{ "event": 7000, "state": 0, "urgency": 1, "modifyuserid": f.GetSession("userid"), } if u.MgoEB.Count("luaconfig", q) > 0 { f.Write("名下还有7000节点待完成的紧急爬虫,暂无法处理该爬虫!") return nil } } if copy != "" { //luacopy, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": copy}) luacopy, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": copy}) if len(*luacopy) > 0 { (*lua)["model"] = (*luacopy)["model"] common_copy := (*luacopy)["param_common"].([]interface{}) common := (*lua)["param_common"].([]interface{}) common_copy[0] = common[0] common_copy[1] = common[1] common_copy[2] = common[2] common_copy[11] = common[11] (*lua)["param_common"] = (*luacopy)["param_common"] (*lua)["param_time"] = (*luacopy)["param_time"] (*lua)["param_list"] = (*luacopy)["param_list"] (*lua)["param_content"] = (*luacopy)["param_content"] (*lua)["str_list"] = (*luacopy)["str_list"] (*lua)["str_time"] = (*luacopy)["str_time"] (*lua)["str_content"] = (*luacopy)["str_content"] (*lua)["Thref"] = (*luacopy)["Thref"] (*lua)["Tpublishtime"] = (*luacopy)["Tpublishtime"] (*lua)["Ttitle"] = (*luacopy)["Ttitle"] (*lua)["Tdate"] = (*luacopy)["Tdate"] (*lua)["type_content"] = (*luacopy)["type_content"] (*lua)["type_list"] = (*luacopy)["type_list"] (*lua)["type_time"] = (*luacopy)["type_time"] } } if (*lua)["listcheck"] != nil { listcheck := (*lua)["listcheck"].(string) listcheck = strings.Replace(listcheck, "\\n", "\n", -1) listcheck = strings.Replace(listcheck, "\\", "", -1) (*lua)["listcheck"] = listcheck } if (*lua)["contentcheck"] != nil { contentcheck := (*lua)["contentcheck"].(string) contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1) contentcheck = strings.Replace(contentcheck, "\\", "", -1) (*lua)["contentcheck"] = contentcheck } js, _ := json.MarshalIndent((*lua)["model"], "", " ") (*lua)["js"] = string(js) f.T["lua"] = lua f.T["taskId"] = text f.T["restate"] = restate f.T["isflow"] = (*lua)["isflow"] f.T["spidertype"] = (*lua)["spidertype"] f.T["spidermovevent"] = (*lua)["spidermovevent"] f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"] f.T["spiderremark"] = (*lua)["spiderremark"] f.T["identity"] = f.GetSession("identity") events := []string{} for k, _ := range sp.Config.Uploadevents { events = append(events, k) } sort.Strings(events) f.T["events"] = events if (*lua)["oldlua"] != nil { return f.Render("oldedit.html", &f.T) } f.T["areas"] = u.Area // f.T["citys"] = u.City // f.T["provinces"] = u.Province // //查询爬虫是否有列表页处理中的异常任务 f.T["clearchannel"] = u.MgoEB.Count("task", map[string]interface{}{"s_code": code, "s_type": "1", "i_state": 2}) > 0 return f.Render("spideredit.html", &f.T) } } else { f.Write("您没有编辑他人脚本的权限") } } return nil } // 查看某个爬虫 func (f *Front) ViewSpider(id string) error { auth := qu.IntAll(f.GetSession("auth")) if auth >= 1 { if f.Method() == "GET" { code := sp.Se.Decode4Hex(id) f.T["actiontext"] = "编辑" //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code}) lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code}) if len(*lua) > 0 { if (*lua)["listcheck"] != nil { listcheck := (*lua)["listcheck"].(string) listcheck = strings.Replace(listcheck, "\\n", "\n", -1) listcheck = strings.Replace(listcheck, "\\", "", -1) (*lua)["listcheck"] = listcheck } if (*lua)["contentcheck"] != nil { contentcheck := (*lua)["contentcheck"].(string) contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1) contentcheck = strings.Replace(contentcheck, "\\", "", -1) (*lua)["contentcheck"] = contentcheck } js, _ := json.MarshalIndent((*lua)["model"], "", " ") (*lua)["js"] = string(js) f.T["lua"] = lua f.T["isflow"] = (*lua)["isflow"] f.T["spidertype"] = (*lua)["spidertype"] f.T["spidermovevent"] = (*lua)["spidermovevent"] f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"] events := []string{} for k, _ := range sp.Config.Uploadevents { events = append(events, k) } sort.Strings(events) f.T["events"] = events if (*lua)["oldlua"] != nil { return f.Render("oldedit.html", &f.T) } f.T["areas"] = u.Area // f.T["citys"] = u.City // f.T["provinces"] = u.Province // f.T["spiderremark"] = (*lua)["spiderremark"] return f.Render("spiderview.html", &f.T) } else { f.Write("没有对应记录!") return nil } } return f.Redirect("/center") } else { f.Write("您没有查看他人脚本的权限") return nil } } func (f *Front) LoadModel(id string) error { if f.Method() == "GET" { //lua, _ := u.MgoE.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1) lua, _ := u.MgoEB.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1) if len(*lua) > 0 { f.ServeJson((*lua)[0]) } } return f.Redirect("/center") } func (f *Front) SaveStep() { rep := map[string]interface{}{} if f.Step2.Types == 2 || f.Step3.Types == 2 { //chrome模式只支持转成专家模式保存 rep["msg"] = "爬虫不支持chrome模式保存" f.ServeJson(rep) return } userid, _ := f.GetSession("userid").(string) auth := qu.IntAll(f.GetSession("auth")) if f.GetString("oldlua") != "" { id := f.GetString("code") //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": id}) one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": id}) id = qu.ObjToString((*one)["code"]) script := f.GetStringComm("script") if strings.Index(script, id) == -1 { rep["msg"] = "code/名称都不能更改" f.ServeJson(rep) return } else { upset := map[string]interface{}{"luacontent": script} upset["modifytime"] = time.Now().Unix() //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false) b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false) if b { rep["msg"] = "保存成功" rep["code"] = sp.Se.Encode2Hex(id) f.ServeJson(rep) return } } } else { if f.Base.SpiderName != "" && f.Base.SpiderCode != "" { code := f.Base.SpiderCode //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode}) one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode}) //记录上架操作前的第一次保存时的爬虫历史 user := f.GetSession("username").(string) LuaSaveLog(f.Base.SpiderCode, user, one, 0) state := qu.IntAllDef((*one)["state"], 0) restate := qu.IntAll((*one)["restate"]) infoformat := qu.IntAll((*one)["infoformat"]) comeintime := time.Now().Unix() if len((*one)) > 0 { comeintime = qu.Int64All((*one)["comeintime"]) ouserid := qu.ObjToString((*one)["createuserid"]) if ouserid != userid && auth == u.Role_Dev { f.Write("权限不够,不能修改他人脚本") return } else { code = qu.ObjToString((*one)["code"]) f.Base.SpiderCode = code f.Base.SpiderName = ((*one)["param_common"].([]interface{}))[1].(string) } } else { if auth != u.Role_Admin { f.Write("不能新建爬虫,请联系管理员导入") return } } listcheck := f.GetString("listcheck") contentcheck := f.GetString("contentcheck") if auth == u.Role_Dev { //f.Base.SpiderStoreToMsgEvent = 4002 } common := []interface{}{ f.Base.SpiderCode, f.Base.SpiderName, f.Base.SpiderChannel, f.Base.SpiderDownDetailPage, f.Base.SpiderStartPage, f.Base.SpiderMaxPage, f.Base.SpiderRunRate, f.Base.Spider2Collection, f.Base.SpiderPageEncoding, f.Base.SpiderStoreMode, f.Base.SpiderStoreToMsgEvent, f.Base.SpiderTargetChannelUrl, f.Base.SpiderLastDownloadTime, f.Base.SpiderIsHistoricalMend, f.Base.SpiderIsMustDownload, } ptime := []interface{}{ f.Step1.DateFormat, f.Step1.Address, f.Step1.ContentChooser, } list := []interface{}{ f.Step2.Listadd, f.Step2.Listadds, f.Step2.BlockChooser, f.Step2.AddressChooser, f.Step2.TitleChooser, f.Step2.DateChooser, f.Step2.DateFormat, } content := []interface{}{ f.Step3.ContentChooser, f.Step3.ElementChooser, } param_list_chrome, param_list_rangechrome, param_content_chrome := []sp.ChromeActions{}, []sp.ChromeActions{}, []sp.ChromeActions{} json.Unmarshal([]byte(f.Step2.Chrome), ¶m_list_chrome) json.Unmarshal([]byte(f.Step2.RangeChrome), ¶m_list_rangechrome) json.Unmarshal([]byte(f.Step3.Chrome), ¶m_content_chrome) param := map[string]interface{}{} common[4] = 1 param["param_common"] = common param["channel"] = f.Base.SpiderChannel param["href"] = f.Base.SpiderTargetChannelUrl //向导模式 param["param_time"] = ptime param["param_list"] = list param["param_list_chrome"] = param_list_chrome param["param_list_rangechrome"] = param_list_rangechrome param["param_list_rangetimes"] = f.Step2.RangeTimes param["param_content_chrome"] = param_content_chrome param["param_content"] = content param["type_time"] = f.Step1.Types param["type_list"] = f.Step2.Types param["type_content"] = f.Step3.Types //专家模式 param["str_time"] = f.Step1.Expert param["str_list"] = f.Step2.Expert param["str_list_chrome"] = f.Step2.Chrome param["str_list_rangechrome"] = f.Step2.RangeChrome param["str_content_chrome"] = f.Step3.Chrome param["str_content"] = f.Step3.Expert param["comeintime"] = comeintime listcheck = strings.Replace(listcheck, "\n", "\\\\n", -1) param["listcheck"] = strings.Replace(listcheck, "\"", "\\\\\"", -1) contentcheck = strings.Replace(contentcheck, "\n", "\\\\n", -1) param["contentcheck"] = strings.Replace(contentcheck, "\"", "\\\\\"", -1) //补充模型 s_model := f.GetString("model") configModel := sp.Config.Model[s_model] model := map[string]interface{}{} for k, _ := range configModel { model[k] = f.GetString(k) } model["model"] = s_model param["code"] = f.Base.SpiderCode param["model"] = model if len((*one)) > 0 { param["createuser"] = (*one)["createuser"] param["createuserid"] = (*one)["createuserid"] param["code"] = (*one)["code"] //开发员关联任务修改爬虫状态 state = qu.IntAll((*one)["state"]) if auth == u.Role_Dev && state >= Sp_state_3 && restate != 1 { //开发员修改,已经审核通过(不包含已上架),状态重置为待完成(restate!=1判断,重采修改保存爬虫时不修改爬虫状态) param["state"] = 0 } else { param["state"] = state } } else { param["createuser"] = f.GetSession("loginuser") param["createuserid"] = f.GetSession("userid") param["createuseremail"] = f.GetSession("email") param["next"] = f.GetSession("email") param["state"] = 0 } if qu.ObjToString((*one)["modifyuser"]) == "" { param["modifyuser"] = param["createuser"] param["modifyuserid"] = param["createuserid"] } param["modifytime"] = time.Now().Unix() param["Ttitle"] = f.Step3.T_title param["Thref"] = f.Step3.T_href param["Tdate"] = f.Step3.T_date //其他信息 param["isflow"] = f.OtherBase.IsFlow param["spidertype"] = f.OtherBase.SpiderType param["spiderhistorymaxpage"] = f.OtherBase.SpiderHistoryMaxPage tmpEvent, err := strconv.Atoi(f.OtherBase.SpiderMoveEvent) //f.OtherBase.SpiderMoveEvent此处SpiderMoveEvent已不表示comm、bid,表示增量的节点 msgResult := map[string]string{} if f.Base.SpiderMaxPage == 1 { msgResult["warn"] = "提醒,增量采集页过小,请再次核对!" } //其他校验 if f.Base.SpiderChannel == "" { msgResult["err"] = "栏目名称为空!" } if f.Base.SpiderTargetChannelUrl == "" { msgResult["err"] = "栏目地址为空!;" + msgResult["err"] } if f.OtherBase.SpiderType == "history" { //爬虫类型是history的放到7000节点,并记录历史节点 var historyMaxPageWarn string if f.OtherBase.SpiderHistoryMaxPage > 500 { //新爬虫跑历史超过500页提醒新建复制爬虫 historyMaxPageWarn = "提醒,历史页过大,考虑复制爬虫!" } else if f.OtherBase.SpiderHistoryMaxPage == 1 { historyMaxPageWarn = "提醒,历史页过小,请再次核对!" } if historyMaxPageWarn != "" { msgResult["warn"] = historyMaxPageWarn + ";" + msgResult["warn"] } param["event"] = 7000 if err == nil { param["incrementevent"] = tmpEvent //开发人员切换增量节点 } else if event := qu.IntAll((*one)["event"]); event != 7000 { //默认增量节点 param["incrementevent"] = event } param["urgency"] = 1 //保存到7000时,爬虫紧急度变为紧急(控制7000节点爬虫紧急未写完不能写普通) if (*one)["downevent"] == nil { param["downevent"] = qu.IntAll((*one)["event"]) //当前节点 } } else if f.OtherBase.SpiderType == "increment" && err == nil { //增量 param["event"] = tmpEvent //开发人员切换增量节点 //开发人员修改爬虫节点后,在审核人员上架时,要在原来的节点下架,临时记录要下架的节点downevent if event := qu.IntAll((*one)["event"]); event != tmpEvent && (*one)["downevent"] == nil { param["downevent"] = event } } if movevent, ok := sp.Config.Uploadevents[f.OtherBase.SpiderMoveEvent].(string); ok && movevent != "" { param["spidermovevent"] = movevent } //三级页复制 param["str_recontent"] = f.StepRe3.Expert param["iscopycontent"] = f.StepRe3.Checked // param["listisfilter"] = u.ListFilterReg.MatchString(f.Step2.Expert) //列表页校验是否含“--关键词过滤” param["projecthref"] = ProjectHrefReg.MatchString(f.Step2.Expert) || ProjectHrefReg.MatchString(f.Step3.Expert) checkLua := LuaTextCheck(infoformat, param, param_list_chrome, param_list_rangechrome, param_content_chrome, msgResult) //checkLua := LuaTextCheck(f.Base.SpiderCode, f.Step2.Expert, f.Step3.Expert, f.Step2.Types, infoformat, model, msgResult) if !checkLua { issave := spider.SaveSpider(code, param) //保存脚本 if issave { for k, v := range *one { if k != "_id" && param[k] == nil { param[k] = v } } Wlog(f.Base.SpiderName, f.Base.SpiderCode, user, f.GetSession("userid").(string), "修改", param) if warn := msgResult["warn"]; warn != "" { rep["msg"] = "保存成功;" + msgResult["warn"] } rep["ok"] = true } else { rep["msg"] = "保存失败" rep["ok"] = false } } else { rep["msg"] = "保存失败," + msgResult["err"] rep["ok"] = false } rep["code"] = sp.Se.Encode2Hex(code) f.ServeJson(rep) } } } func (f *Front) SaveJs() { //param_type := f.GetString("param_type") //return_type := f.GetString("return_type") js_name := f.GetString("js_name") js_alias := f.GetString("js_alias") jstext := f.GetString("jstext") step := f.GetString("step") code := f.GetString("code") update := map[string]interface{}{ "runjs": true, } js := map[string]interface{}{ "js_text": jstext, "js_name": js_name, "js_step": step, "js_alias": js_alias, "js_param": "string", "js_return": "string", } if step == "list" { update["js_list"] = js } else if step == "detail" { update["js_detail"] = js } u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": update}, false, false) f.ServeJson(map[string]interface{}{"ok": true}) } // LuaCheckSaveLog 保存机检日志 func LuaCheckSaveLog(lua *map[string]interface{}) { u.MgoEB.Save("luachecklog", map[string]interface{}{ "code": (*lua)["code"], "check": (*lua)["check"], "checkok": (*lua)["checkok"], "checktime": (*lua)["checktime"], "comeintime": time.Now().Unix(), }) } func UpdateSiteTask(code string) { query := map[string]interface{}{ "b_repair": false, "s_spidercode": code, } set := map[string]interface{}{ "b_repair": true, "l_update_time": time.Now().Unix(), } u.MgoEB.Update("spider_important_warning", query, map[string]interface{}{"$set": set}, false, true) } func LuaSaveLog(code, user string, data *map[string]interface{}, stype int) { saveOne, _ := u.MgoEB.FindOne("luasavelog", map[string]interface{}{"state": 0, "code": code}) if stype == 0 { //保存记录 if len(*saveOne) == 0 && len(*data) > 0 { //重新记录 delete(*data, "_id") save := map[string]interface{}{ "code": code, "state": 0, "saveuser": user, "comeintime": time.Now().Unix(), "luaold": data, } u.MgoEB.Save("luasavelog", save) } } else if stype == 1 { //对比 if len(*saveOne) > 0 { tmp := (*saveOne)["luaold"].(map[string]interface{}) updateMap := map[string]interface{}{} //记录字段改变值 for k, v := range *data { if k != "_id" && k != "state" && k != "modifytime" { if tmpV := tmp[k]; tmpV != nil { //历史记录存在字段 tmpJson, _ := json.Marshal(tmpV) dataJson, _ := json.Marshal(v) if string(tmpJson) != string(dataJson) { updateMap[k] = v } delete(tmp, k) //删除对比过的字段 } else { //历史记录不存在字段 updateMap[k] = v } } } if len(tmp) > 0 { for k, _ := range tmp { //上架时爬虫较历史爬虫少的字段信息 updateMap[k] = nil } } set := map[string]interface{}{"state": 1, "updatetime": time.Now().Unix(), "updateuser": user} if len(updateMap) > 0 { //有字段改变 set["luaupdate"] = updateMap set["lusnew"] = data } u.MgoEB.UpdateById("luasavelog", (*saveOne)["_id"], map[string]interface{}{"$set": set}) } } } /* 爬虫保存时,检查列表页和三级页代码中是否含lua原生方法 func LuaTextCheck(code, list, detail string, type_list, infoformat int, model map[string]interface{}, msgResult map[string]string) bool { defer qu.Catch() //1、异常校验 var errmsg, warnmsg string if LuaReg.MatchString(list) || LuaReg.MatchString(detail) { errmsg += "代码中含有lua原生方法;" } if ListFilterReg.MatchString(detail) && !strings.Contains(detail, "delete") { //三级页含过滤但是没有data["delete"]="true" errmsg += `三级页缺少data["delete"]="true";` } sln_reg := regexp.MustCompile(`sendListNum\(pageno,list\)`) slnIndexArr := sln_reg.FindAllStringIndex(list, -1) if type_list != 0 && len(slnIndexArr) == 0 { //列表页专家模式且不含sendListNum errmsg += "代码中缺少sendListNum(pageno,list)方法;" } else if type_list == 1 && len(slnIndexArr) > 0 { //判断sendListNum方法的位置 trim_reg := regexp.MustCompile("trim") insert_reg := regexp.MustCompile("insert") trIndexArr := trim_reg.FindAllStringIndex(list, -1) irIndexArr := insert_reg.FindAllStringIndex(list, -1) slIndex := slnIndexArr[len(slnIndexArr)-1] //sendListNum位置 trIndex := trIndexArr[len(trIndexArr)-1] //com.trim位置 irIndex := irIndexArr[len(irIndexArr)-1] //insert位置 qu.Debug("sendListNum位置:", trIndex, slIndex, irIndex) if slIndex[1] < trIndex[0] || slIndex[0] > irIndex[1] { //sendListNum方法必须在com.trim方法后,table.insert方法前 errmsg += "sendListNum方法位置错误;" } } if type_list == 1 { //校验列表页area、city、distric if !strings.Contains(list, "area") { errmsg += `模板item["area"]不存在;` } if !strings.Contains(list, "city") { errmsg += `模板item["city"]不存在;` } if !strings.Contains(list, "district") { errmsg += `模板item["district"]不存在;` } area := qu.ObjToString(model["area"]) city := qu.ObjToString(model["city"]) district := qu.ObjToString(model["district"]) if area != "" && !strings.Contains(list, area) { errmsg += "省份信息与模板不一致;" } if city != "" && !strings.Contains(list, city) { errmsg += "城市信息与模板不一致;" } if district != "" && !strings.Contains(list, district) { errmsg += "区/县信息与模板不一致;" } if infoformat == 2 && !strings.Contains(detail, "projectname") { errmsg += "拟建/审批数据缺少projectname字段;" } //校验爬虫代码的一致性 if !strings.Contains(list, code) { errmsg += `模板item["spidercode"]值错误;` } } //2、提醒校验 if !strings.Contains(detail, "downloadFile") && !strings.Contains(detail, "getFileAttachmentsArrayWithTag") { warnmsg += "三级页缺少下载附件方法;" } msgResult["warn"] += warnmsg msgResult["err"] = errmsg return errmsg != "" } */ func LuaTextCheck(infoformat int, param map[string]interface{}, param_list_chrome, param_list_rangechrome, param_content_chrome []sp.ChromeActions, msgResult map[string]string) bool { defer qu.Catch() list := qu.ObjToString(param["str_list"]) detail := qu.ObjToString(param["str_content"]) type_list := qu.IntAll(param["type_list"]) type_content := qu.IntAll(param["type_content"]) model, _ := param["model"].(map[string]interface{}) var errmsg, warnmsg string if qu.IntAll(param["type_list"]) == 0 { errmsg += "列表页非专家模式;" } if qu.IntAll(param["type_content"]) == 0 { errmsg += "详情页非专家模式;" } //1、异常校验 if type_list == 1 { if u.LuaReg.MatchString(list) { errmsg += "列表页代码中含有lua原生方法;" } if strings.Contains(list, "downloadByChrome") { //chrome下载方法动作参数判断 for _, act := range param_list_chrome { if act.Action != "changeip" && act.Param == "" { errmsg += "列表页chrome模式'" + act.Action + "'基础动作未填写参数,填写后注意Ctrl+F10重新插入代码;" break } } for _, act := range param_list_rangechrome { if act.Action != "changeip" && act.Param == "" { errmsg += "列表页chrome模式'" + act.Action + "'循环动作未填写参数,填写后注意Ctrl+F10重新插入代码;" break } } } if strings.Contains(list, `item["title"]="a"`) { if !strings.Contains(detail, `data["title"]`) { errmsg += "检查代码title的完整性;" } } if strings.Contains(list, "stringFind") && !strings.Contains(list, "--关键词过滤") { errmsg += "列表页代码有过滤方法stringFind但缺少注释:--关键词过滤;" } slnIndexArr := u.SendListNumReg.FindAllStringIndex(list, -1) if len(slnIndexArr) == 0 { //列表页专家模式且不含sendListNum errmsg += "代码中缺少sendListNum(pageno,list)方法;" } else if len(slnIndexArr) > 0 { //判断sendListNum方法的位置 trIndexArr := u.TrimReg.FindAllStringIndex(list, -1) irIndexArr := u.InsertReg.FindAllStringIndex(list, -1) slIndex := slnIndexArr[len(slnIndexArr)-1] //sendListNum位置 trIndex := trIndexArr[len(trIndexArr)-1] //com.trim位置 irIndex := irIndexArr[len(irIndexArr)-1] //insert位置 qu.Debug("sendListNum位置:", trIndex, slIndex, irIndex) if slIndex[1] < trIndex[0] || slIndex[0] > irIndex[1] { //sendListNum方法必须在com.trim方法后,table.insert方法前 errmsg += "sendListNum方法位置错误;" } } if param, ok := param["param_common"].([]interface{}); ok && len(param) >= 3 { spidercode := qu.ObjToString(param[0]) site := qu.ObjToString(param[1]) channel := qu.ObjToString(param[2]) if !strings.Contains(list, fmt.Sprintf(u.CheckText_Code, spidercode)) { errmsg += `爬虫代码的值与模板不一致;` } if !strings.Contains(list, fmt.Sprintf(u.CheckText_Site, site)) { errmsg += `站点的值与模板不一致;` } if !strings.Contains(list, fmt.Sprintf(u.CheckText_Channel, channel)) { warnmsg += `栏目的值与模板不一致;` } } //校验列表页area、city、distric area := qu.ObjToString(model["area"]) city := qu.ObjToString(model["city"]) district := qu.ObjToString(model["district"]) if !strings.Contains(list, fmt.Sprintf(u.CheckText_Area+`="%s"`, area)) { errmsg += `省份信息与模板不一致` } if !strings.Contains(list, fmt.Sprintf(u.CheckText_City+`="%s"`, city)) { errmsg += `城市信息与模板不一致` } if !strings.Contains(list, fmt.Sprintf(u.CheckText_District+`="%s"`, district)) { errmsg += `区/县信息与模板不一致` } if infoformat == 2 && !strings.Contains(detail, "projectname") { errmsg += "拟建/审批数据缺少projectname字段;" } //校验爬虫代码的一致性 //if !strings.Contains(list, code) { // errmsg += `模板item["spidercode"]值错误;` //} isHttps := false for _, text := range u.DomainNameReg.FindAllString(list, -1) { if strings.Contains(text, "https") { isHttps = true } } if isHttps { for tmpStr, tmpText := range map[string]string{"列表页": list, "三级页": detail} { downLoadText := u.DownLoadReg.FindString(tmpText) if downLoadText != "" { textArr := strings.Split(downLoadText, ",") if len(textArr) < 4 { errmsg += "download方法添加下载参数;" } else if len(textArr) == 4 { if !u.CodeTypeReg.MatchString(textArr[0]) || (textArr[1] != "true" && textArr[1] != "false") { errmsg += tmpStr + "download方法添加下载参数;" } } } } } } if type_content == 1 { if u.LuaReg.MatchString(detail) { errmsg += "详情页代码中含有lua原生方法;" } if u.ListFilterReg.MatchString(detail) && !strings.Contains(detail, "delete") { //三级页含过滤但是没有data["delete"]="true" warnmsg += `详情页缺少data["delete"]="true";` } if !strings.Contains(detail, "s_title") { errmsg += "详情页缺少s_title;" } if strings.Contains(detail, "downloadByChrome") { //chrome下载方法动作参数判断 for _, act := range param_content_chrome { if act.Action != "changeip" && act.Param == "" { errmsg += "详情页chrome模式'" + act.Action + "'动作未填写参数,填写后注意Ctrl+F10重新插入代码;" } } } } //2、提醒校验 if !strings.Contains(detail, "downloadFile") && !strings.Contains(detail, "getFileAttachmentsArrayWithTag") { warnmsg += "详情页缺少下载附件方法;" } msgResult["warn"] += warnmsg msgResult["err"] += errmsg return msgResult["err"] != "" } // 方法测试 func (f *Front) RunStep() { imodal, _ := f.GetInteger("imodal") script, _ := f.GetBool("script") listcheck := f.GetString("listcheck") contentcheck := f.GetString("contentcheck") downloadnode := f.GetString("downloadnode") //下载节点 common := []interface{}{ f.Base.SpiderCode, f.Base.SpiderName, f.Base.SpiderChannel, f.Base.SpiderDownDetailPage, f.Base.SpiderStartPage, f.Base.SpiderMaxPage, f.Base.SpiderRunRate, f.Base.Spider2Collection, f.Base.SpiderPageEncoding, f.Base.SpiderStoreMode, f.Base.SpiderStoreToMsgEvent, f.Base.SpiderTargetChannelUrl, f.Base.SpiderLastDownloadTime, f.Base.SpiderIsHistoricalMend, f.Base.SpiderIsMustDownload, "", "", "", } if f.Method() == "POST" { switch f.GetString("step") { case "Step1": //publishtime ptime := []interface{}{ f.Step1.DateFormat, f.Step1.Address, f.Step1.ContentChooser, } if script { _, scripts := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal, 1) f.ServeJson(scripts) return } rs, err := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal) if err == nil { f.ServeJson(rs) } case "Step2": //list if imodal == 2 { //chromedp测试 //基础动作 if f.Step2.Chrome != "" { chromeActions := []sp.ChromeActions{} rangeChromeActions := []sp.ChromeActions{} var result []interface{} //格式化基础动作 if json.Unmarshal([]byte(f.Step2.Chrome), &chromeActions) != nil { f.ServeJson("chrome基础动作格式化错误") return } //格式化循环动作 if f.Step2.RangeChrome != "" { if json.Unmarshal([]byte(f.Step2.RangeChrome), &rangeChromeActions) != nil { f.ServeJson("chrome循环动作格式化错误") return } } //拼装基础和循环动作 if len(rangeChromeActions) > 0 && f.Step2.RangeTimes > 0 { for times := 1; times <= f.Step2.RangeTimes; times++ { chromeActions = append(chromeActions, rangeChromeActions...) } } chromeTask := sp.ChromeTask{ TimeOut: 120, //测试默认两分钟 Actions: chromeActions, } result = spider.DownloadByChrome(downloadnode, "", chromeTask, 150) resultMap := map[int]interface{}{} for i, r := range result { resultMap[i] = r } f.ServeJson(resultMap) } else { f.ServeJson("未进行动作编排") } } else { addrs := strings.Split(f.Step2.Listadds, "\n") if len(addrs) > 0 { for k, v := range addrs { addrs[k] = "'" + v + "'" } f.Step2.Listadds = strings.Join(addrs, ",") } else if len(f.Step2.Listadds) > 5 { f.Step2.Listadds = "'" + f.Step2.Listadds + "'" } else { f.Step2.Listadds = "" } list := []interface{}{ f.Step2.Listadd, f.Step2.Listadds, f.Step2.BlockChooser, f.Step2.AddressChooser, f.Step2.TitleChooser, f.Step2.DateChooser, f.Step2.DateFormat, } listcheck = strings.Replace(listcheck, "\n", "\\n", -1) listcheck = strings.Replace(listcheck, "\"", "\\\"", -1) s_model := f.GetString("model") configModel := sp.Config.Model[s_model] model := map[string]interface{}{} for k, _ := range configModel { model[k] = f.GetString(k) } if script { _, script := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal, 1) f.ServeJson(script) return } rs, err := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal) if err == nil { f.ServeJson(rs) } else if err.(error).Error() == "no" { f.ServeJson(rs[0]) } } case "Step3": //detail if imodal == 2 { //chromedp测试 chromeActions := []sp.ChromeActions{} var result []interface{} if json.Unmarshal([]byte(f.Step3.Chrome), &chromeActions) == nil { chromeTask := sp.ChromeTask{ TimeOut: 120, //测试默认两分钟 Actions: chromeActions, } result = spider.DownloadByChrome(downloadnode, "", chromeTask, 150) } else { result = append(result, "chrome task json 格式化错误") } resultMap := map[int]interface{}{} for i, r := range result { resultMap[i] = r } f.ServeJson(resultMap) } else { content := []interface{}{ f.Step3.ContentChooser, f.Step3.ElementChooser, } contentcheck = strings.Replace(contentcheck, "\n", "\\n", -1) contentcheck = strings.Replace(contentcheck, "\"", "\\\"", -1) data := map[string]interface{}{} data["title"] = f.Step3.T_title data["href"] = f.Step3.T_href data["publishtime"] = f.Step3.T_date if script { _, script := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal, 1) f.ServeJson(script) return } rs, err := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal) if projectinfo, ok := rs["projectinfo"].(map[string]interface{}); ok && projectinfo != nil { if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok && attachments != nil { for _, tmp := range attachments { tmpMap := tmp.(map[string]interface{}) if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" { rs["msg"] = "附件中含有乱码" } } } } if err == nil { f.ServeJson(rs) } else { f.ServeJson(rs["no"]) } } } } } // 爬虫测试数据json func (f *Front) GetJson() { code := f.GetString("code") username := f.GetSession("username").(string) if tr := TestResultMap[username+code]; tr != nil { task, _ := u.MgoEB.FindOne("task", map[string]interface{}{"code": code, "i_state": 3}) comeintime := int64(0) if len(*task) > 0 { comeintime = qu.Int64All((*task)["l_comeintime"]) tr.task_remark = "审核任务创建时间:" + qu.FormatDateByInt64(&comeintime, qu.Date_Short_Layout) + ";" + tr.task_remark } data := tr.dataInfo result := tr.listInfo if len(data) > 0 { data["contenthtml"] = "" } num := 0 list_fir := []map[string]interface{}{} list_sec := []map[string]interface{}{} for page, list := range result { for k, v := range list { v["a_index"] = k + 1 num++ } if page == 1 { list_fir = list } else if page == 2 { list_sec = list } } f.T["list_fir"] = list_fir f.T["list_sec"] = list_sec f.T["data"] = data f.T["num"] = num f.T["descript"] = tr.task_descript f.T["remark"] = tr.task_remark f.T["rateremark"] = tr.task_rateremark f.T["reason"] = tr.reason f.T["msg"] = tr.msg f.T["comeintime"] = comeintime delete(TestResultMap, username+code) } f.Render("jsonInfo.html", &f.T) } var TestResultMap = map[string]*TestResult{} //username+code // 某个爬虫整体测试结果 type TestResult struct { task_remark string task_rateremark []string task_descript string reason string msg string listInfo map[int64][]map[string]interface{} dataInfo map[string]interface{} } // 整体测试 func (f *Front) SpiderPass() { defer mu.Catch() tr := &TestResult{} result := map[int64][]map[string]interface{}{} data := map[string]interface{}{} msgArr := []string{} code := f.GetString("code") downloadnode := f.GetString("node") //根据code查询待确认任务 query := map[string]interface{}{ "s_code": code, "i_state": 3, } task, _ := u.MgoEB.FindOne("task", query) descript := "null" remark := "null" remarktmp := []string{} rateremarktmp := []string{} if len(*task) > 0 { descript = (*task)["s_descript"].(string) if mrecord, ok := (*task)["a_mrecord"].([]interface{}); ok { for _, m := range mrecord { remarkInfo := m.(map[string]interface{}) if remark := qu.ObjToString(remarkInfo["s_mrecord_remark"]); remark != "" { remarktmp = append(remarktmp, remark+";") } if rateremark := qu.ObjToString(remarkInfo["s_mrecord_rateremark"]); rateremark != "" { rateremarktmp = append(rateremarktmp, rateremark+";") } } } } if len(remarktmp) > 0 { remark = "" remark = strings.Join(remarktmp, "") } tr.task_remark = remark tr.task_rateremark = rateremarktmp tr.task_descript = descript //基本信息、方法一(发布时间)、方法二(列表页)、方法三(详情页)、总请求次数、go方法一、go方法二、go方法三、列表页条数 steps := []interface{}{false, false, false, false, 0, 0, 0, 0, 0} one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code}) reason, _ := (*one)["reason"].(string) tr.reason = reason if len(*one) > 0 && (*one)["oldlua"] == nil { common := (*one)["param_common"].([]interface{}) if len(common) < 13 { f.ServeJson(steps) return } else { steps[0] = true } } else { steps[0] = true } script, liststr, contentstr := "", "", "" if (*one)["oldlua"] == nil { script, liststr, contentstr = spider.GetScriptByCode(code) } else { script = (*one)["luacontent"].(string) } if liststr != "" && contentstr != "" { msgArr = u.SpiderPassCheckLua(liststr, contentstr, (*one)) //校验 } s := spider.CreateSpider(downloadnode, script) s.SpiderMaxPage = 2 //采集列表页总页数 s.Timeout = 60 timestr, timeerr := s.GetLastPublishTime() if timeerr == nil && len(timestr) > 4 { steps[1] = true //发布时间获取成功 downloadNum := 0 result, downloadNum, _ = s.DownListPageItem() //列表页采集结果 if downloadNum == 0 { f.ServeJson(steps) return } steps[2] = true //列表页获取成功 steps[8] = downloadNum //下载量 tr.listInfo = result if s.DownDetail { onePageList := result[1] //第一页数据 if onePageDataNum := len(onePageList); onePageDataNum > 0 { index := onePageDataNum / 2 //取一条数据下载三级页 param := map[string]string{} for k, v := range onePageList[index] { param[k] = qu.ObjToString(v) } data = map[string]interface{}{} s.DownloadDetailPage(param, data) tr.dataInfo = data if len(data) == 0 || qu.ObjToString(data["detail"]) == "" { steps[3] = false //详情页获取失败 } else { steps[3] = true //详情页获取成功 } } } else { steps[3] = true //详情页获取成功 } //list, _ = s.DownListPageItem() //for _, l := range list { // if publishtime := qu.ObjToString(l["publishtime"]); publishtime == "0" || publishtime == "" { // msgArr = append(msgArr, "列表页publishtime取值异常") // break // } else { // t, err := time.ParseInLocation(qu.Date_Full_Layout, publishtime, time.Local) // if err != nil || t.Unix() <= 0 { // msgArr = append(msgArr, "列表页publishtime取值异常") // break // } // } //} //if len(list) > 0 { // tr.listInfo = list // listone := list[0] // if len(qu.ObjToString(listone["href"])) < 7 || // (qu.ObjToString(listone["publishtime"]) != "0" && len(qu.ObjToString(listone["publishtime"])) < 5) || // len(qu.ObjToString(listone["title"])) < 3 { // f.ServeJson(steps) // return // } else { // steps[2] = true // if s.DownDetail { // param := map[string]string{} // index := 0 // if len(list) > 0 { // steps[8] = len(list) // index = len(list) / 2 // for k, v := range list[index] { // param[k] = qu.ObjToString(v) // } // data = map[string]interface{}{} // s.DownloadDetailPage(param, data) // if len(data) > 0 { // tr.dataInfo = data // } // if len(data) == 0 || data["detail"].(string) == "" { // steps[3] = false // } else { // steps[3] = true // } // } // } else { // steps[3] = true // } // } //} } //关闭laustate s.L.Close() steps[4] = s.Test_luareqcount steps[5] = s.Test_goreqtime steps[6] = s.Test_goreqlist steps[7] = s.Test_goreqcon //校验 param_common, _ := (*one)["param_common"].([]interface{}) if qu.IntAll(param_common[5]) == 1 && len(result) > 1 { //页码为1,但能翻页 msgArr = append(msgArr, "爬虫可以翻页,最大页为1是否合适") } msg := u.SpiderPassCheckListAndDetail(result, data) msgArr = append(msgArr, msg...) username := f.GetSession("username").(string) tr.msg = strings.Join(msgArr, ";") TestResultMap[username+code] = tr f.ServeJson(steps) } func (f *Front) DownSpider(code string) { auth := qu.IntAll(f.GetSession("auth")) user := f.GetSession("loginuser") success := false script := "" if auth > u.Role_Dev { success = true //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code}) one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code}) createuserid := qu.ObjToString((*one)["createuserid"]) filename := code + ".lua" if len(*one) > 0 { if (*one)["oldlua"] != nil { if (*one)["luacontent"] != nil { script = (*one)["luacontent"].(string) } } else { user, _ := u.MgoEB.FindById("user", createuserid, nil) name := (*one)["createuser"] email := (*user)["s_email"] upload := time.Now().Format("2006-01-02 15:04:05") script, _, _ = spider.GetScriptByCode(code, name, email, upload) } } f.ResponseWriter.Header().Del("Content-Type") f.ResponseWriter.Header().Add("Content-Type", "application/x-download") f.ResponseWriter.Header().Add("Content-Disposition", "attachment;filename=spider_"+filename) f.WriteBytes([]byte(script)) } else { f.Write("您没有权限") } //记录日志 downlogs := map[string]interface{}{ "code": code, "user": user, "auth": auth, "time": time.Now().Unix(), "success": success, "script": script, } u.MgoEB.Save("luadownlogs", downlogs) } //下架删除download数据 //func delDownloadData(code string) bool { // return mgu.Del("download", "spider", "spider", `{"code":"`+code+`"}`) //} //批量作废删除download数据 //func disableDelDownloadData(code []string) { // for _, v := range code { // flag := delDownloadData(v) // log.Println(code, "---批量删除download数据:", flag) // } //} // 爬虫核对 func (f *Front) Checktime() { code := f.GetString("code") auth := qu.IntAll(f.GetSession("auth")) if auth != u.Role_Admin { f.ServeJson(false) } else { //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{ // "l_checktime": time.Now().Unix(), //}}, true, false) b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{ "l_checktime": time.Now().Unix(), }}, true, false) f.ServeJson(b) } } // 批量作废 func (f *Front) Disables() error { auth := qu.IntAll(f.GetSession("auth")) events := strings.Split(f.GetString("events"), ",") codes := strings.Split(f.GetString("codes"), ",") disablereason := f.GetString("disablereason") state, _ := f.GetInteger("state") res := "" if auth == u.Role_Admin { for k, code := range codes { //更新爬虫状态时爬虫下架 upresult, err := spider.UpdateSpiderByCodeState(code, "6", qu.IntAll(events[k])) //下架 qu.Debug("下架:", code, upresult) if upresult && err == nil { //下架成功 //更新爬虫 u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": state, "disablereason": disablereason}}, false, false) //修改任务状态 u.MgoEB.Update("task", map[string]interface{}{ "s_code": code, "i_state": map[string]interface{}{ "$nin": []int{4, 6}, //更新除审核通过和已关闭的任务 }, }, map[string]interface{}{"$set": map[string]interface{}{"i_state": 6, "l_updatetime": time.Now().Unix()}}, false, true) if err != nil { res = res + code + ", OK" + qu.ObjToString(err.Error()) + ";" } else { res = res + code + ", OK" + ";" } } else { res = res + code + ", 更新失败;" } } } else { res = "没有权限" } f.ServeJson(res) return nil } // 批量上下架 func (f *Front) BatchShelves() { codes := strings.Split(f.GetString("codes"), ",") state, _ := f.GetInteger("state") auth := qu.IntAll(f.GetSession("auth")) errCode := []string{} var err error b := false if IsHasUpState(auth, Sp_state_5) { if state == 5 { //批量上架 for _, code := range codes { if _, ok := u.CodeTimeCache.Get(code); ok { //避免短时间内重复上架 errCode = append(errCode, code+"重复上架") } else { u.CodeTimeCache.Set(code, "", time.Minute*2) //上架时,内存记录上架爬虫 _, err = UpStateAndUpSpider(code, "", "", "", Sp_state_5) if err != nil { errCode = append(errCode, code) } else { u.CodeTimeCache.Del(code) //上架成功后,删除内存记录 } } } } else { //批量下架 for _, code := range codes { b, err = UpStateAndUpSpider(code, "", "", "", Sp_state_6) if !b || err != nil { errCode = append(errCode, code) } //下架删除download数据 //if b { // flag := delDownloadData(code) // log.Println(code, "---删除download数据:", flag) //} } } } else { errCode = append(errCode, "没有权限") } f.ServeJson(errCode) } // 更新爬虫状态 func (f *Front) UpState() error { username := f.GetSession("username").(string) code := f.GetString("code") state, _ := f.GetInt("state") id := f.GetString("taskId") reason := f.GetString("reason") auth := qu.IntAll(f.GetSession("auth")) var codeArr = []string{code} var taskid []string //修改任务状态 istotask := false res := map[string]interface{}{ "istotask": istotask, "err": "没有权限", "code": sp.Se.Encode2Hex(code), "taskid": taskid, } var xgTime int64 if f.GetSession(id) == nil || f.GetSession(id) == "" { xgTime = time.Now().Unix() } else { xgTimeStr := qu.ObjToString(f.GetSession(id)) xgTimeTmp, _ := time.ParseInLocation("2006-01-02 15:04:05", xgTimeStr, time.Local) xgTime = xgTimeTmp.Unix() } f.DelSession(id) if IsHasUpState(auth, int(state)) { b, err := UpStateAndUpSpider(code, "", reason, username, int(state)) //更新爬虫状态 if b && state == Sp_state_1 { //提交审核 //有对应任务跳转提交记录页 taskid = CheckTask(codeArr, 1) if len(taskid) > 0 { res["istotask"] = true res["taskid"] = taskid[0] } } else if b && state == Sp_state_2 { //打回 taskid = CheckTask(codeArr, 2) if len(taskid) > 0 { //UpTaskState([]string{taskid}, 2) //修改状态 UpTaskState(taskid, 2, "", int64(0)) //修改任务状态 SaveRemark(taskid, reason, username) //保存记录信息 } } else if b && state == Sp_state_3 { //审核通过 taskid = CheckTask(codeArr, 3) if len(taskid) > 0 { //UpTaskState([]string{taskid}, 3) UpTaskState(taskid, 3, "", int64(0)) SaveRemark(taskid, "", username) } } else if b && state == Sp_state_6 { //下架 //下架成功删除心跳数据 flag := DelSpiderHeart(code) log.Println(code, "---下架删除download数据:", flag) } else if b && state == Sp_state_7 { //反馈 taskid = CheckTask(codeArr, 7) if len(taskid) > 0 { UpTaskState(taskid, 7, reason, xgTime) } } if err != nil { res["err"] = err.Error() f.ServeJson(res) } else { res["err"] = "" f.ServeJson(res) } } else { f.ServeJson(res) } return nil } func (f *Front) Assort() { state, _ := f.GetInteger("state") code := f.GetString("code") codes := u.SymbolReg.Split(code, -1) success := true msg := "" for _, code := range codes { query := map[string]interface{}{ "code": code, } //下架爬虫 //lua, _ := u.MgoE.FindOne("luaconfig", query) lua, _ := u.MgoEB.FindOne("luaconfig", query) event := qu.IntAll((*lua)["event"]) if (*lua)["downevent"] != nil { //爬虫开发修改爬虫节点,审核人员分类爬虫时,原来爬虫所在节点下架 event = qu.IntAll((*lua)["downevent"]) } upresult, err := spider.UpdateSpiderByCodeState(code, "6", event) qu.Debug("下架爬虫:", code, upresult, err) if upresult && err == nil { //更新爬虫状态 update := map[string]interface{}{ "$set": map[string]interface{}{ "state": state, //"modifytime": time.Now().Unix(), "l_uploadtime": time.Now().Unix(), }, } //u.MgoE.Update("luaconfig", query, update, false, false) u.MgoEB.Update("luaconfig", query, update, false, false) //关闭任务 query = map[string]interface{}{ "s_code": code, } update = map[string]interface{}{ "$set": map[string]interface{}{ "i_state": 6, }, } u.MgoEB.Update("task", query, update, false, true) //删除心跳 DelSpiderHeart(code) } else { success = false msg += code + ";" } } f.ServeJson(map[string]interface{}{"success": success, "msg": msg}) } // 更新爬虫状态,并判断是否更新节点爬虫 func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, error) { upresult := false var err error one := &map[string]interface{}{} if code != "" { //one, _ = u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code}) one, _ = u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code}) } else { //one, _ = u.MgoE.FindById("luaconfig", id, nil) one, _ = u.MgoEB.FindById("luaconfig", id, nil) code = qu.ObjToString((*one)["code"]) } if len(*one) > 0 { var event int unset := map[string]interface{}{} if (*one)["event"] != nil { event = qu.IntAll((*one)["event"]) } else { for k, _ := range sp.Config.Uploadevents { // event = qu.IntAll(k) break } //r := rand.New(rand.NewSource(time.Now().UnixNano())) //event = sp.Config.Uploadevents[r.Intn(len(sp.Config.Uploadevents))] } //oldstate := qu.IntAll(one["state"]) switch state { case Sp_state_4, Sp_state_6: //作废、下架 // if oldstate == Sp_state_5 { // upresult = false // err = errors.New("已上架不允许作废") // } else { // upresult = true // } upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event) //下架 qu.Debug("下架:", upresult, code) case Sp_state_5: //上架(爬虫端在更新上架的时候为了更新内存中字段,采用先下架上架) if downevent := qu.IntAll((*one)["downevent"]); downevent != 0 { //爬虫开发修改爬虫节点,审核人员上架爬虫时,原来爬虫所在节点下架 upresult, err = spider.UpdateSpiderByCodeState(code, "6", downevent) qu.Debug(code, "下架历史节点:", downevent) if upresult && err == nil { unset = map[string]interface{}{"downevent": ""} } } else { upresult, err = spider.UpdateSpiderByCodeState(code, "6", event) } qu.Debug("下架:", upresult, code, event) if upresult && err == nil { upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event) qu.Debug("上架:", upresult, code, event) } case Sp_state_3: //审核通过 //校验爬虫三级页是否有附件下载方法 str_content := qu.ObjToString((*one)["str_content"]) if !strings.Contains(str_content, "downloadFile") && !strings.Contains(str_content, "getFileAttachmentsArrayWithTag") { err = errors.New("三级页缺少下载附件方法") } else { upresult = true } default: upresult = true err = nil } if err != nil && strings.Contains(err.Error(), "timeout") { err = errors.New("连接节点" + fmt.Sprint(event) + "超时") upresult = true } if upresult && err == nil { upset := map[string]interface{}{"state": state} //修改状态 if (*one)["oldlua"] != nil { //老脚本上传 //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, true, false) up := map[string]interface{}{ "$set": upset, } if len(unset) > 0 { up["$unset"] = unset } upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, true, false) } else { if state == Sp_state_1 { //提交审核 upset["l_complete"] = time.Now().Unix() upset["report"] = "" UpdateSiteTask(code) //更新重点站点任务 } else if state == Sp_state_7 { //反馈问题 upset["report"] = reason upset["state"] = 1 //反馈后爬虫改为待审核 upset["l_complete"] = time.Now().Unix() } else if state == Sp_state_3 { //审核通过 if (*one)["event"] == nil { upset["event"] = event //upset["modifytime"] = time.Now().Unix() } upset["frequencyerrtimes"] = 0 //爬虫审核通过,重置采集频率异常次数 upset["l_uploadtime"] = time.Now().Unix() } else if state == Sp_state_2 { //打回原因 upset["reason"] = reason //清理机检结果 unset["check"] = "" unset["checkok"] = "" unset["checktime"] = "" LuaCheckSaveLog(one) //保存机检日志 } else if state == Sp_state_5 { //上架 upset["l_checktime"] = 0 //核对时间重置 //清理机检结果 unset["check"] = "" unset["checkok"] = "" unset["checktime"] = "" LuaCheckSaveLog(one) //保存机检日志 LuaSaveLog(code, username, one, 1) go ModifyLogs_SaveCodeLogs(code, *one) } else if state == Sp_state_4 { //作废,作废原因 upset["disablereason"] = reason //upset["modifytime"] = time.Now().Unix() upset["l_uploadtime"] = time.Now().Unix() //l_complete爬虫完成时间 } up := map[string]interface{}{ "$set": upset, } if len(unset) > 0 { up["$unset"] = unset } //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false) upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, false, false) qu.Debug("提交日志:", code, upset, upresult) if upresult && (state == Sp_state_2 || state == Sp_state_3) { //打回、审核记录日志 types := "打回" if state == Sp_state_3 { types = "审核" } event := qu.IntAll((*one)["event"]) obj := map[string]interface{}{ "code": code, "auditor": username, "types": types, "comeintime": time.Now().Unix(), "reason": reason, "spideruser": (*one)["createuser"], "modifytime": (*one)["modifytime"], "event": event, "site": (*one)["site"], "channel": (*one)["channel"], } if !strings.HasSuffix(code, u.Bu) { //凡是以_bu结尾的爬虫一律不计入审核记录 //新爬虫审核记录表 if event == 7000 && (state == Sp_state_3 || state == Sp_state_2) { count := u.MgoEB.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": "审核"}) if count == 0 { //新爬虫审核记录 u.MgoEB.Save("lua_logs_auditor_new", obj) } } u.MgoEB.Save("lua_logs_auditor", obj) //历史维护爬虫审核记录 } } } } } return upresult, err } // 保存记录信息 func SaveRemark(taskid []string, reason, username string) { timeNow := time.Now().Unix() if reason == "" { reason = "审核通过" } for _, id := range taskid { task, _ := u.MgoEB.FindById("task", id, nil) if task != nil && len(*task) > 0 { checkData := (*task)["a_check"] var checkArr []map[string]interface{} newData := make(map[string]interface{}) newData["s_check_checkUser"] = username newData["l_check_checkTime"] = timeNow newData["s_check_checkRemark"] = reason if checkData != nil { myArr := qu.ObjArrToMapArr(checkData.([]interface{})) if myArr != nil && len(myArr) > 0 { for _, v := range myArr { checkArr = append(checkArr, v) } } } checkArr = append(checkArr, newData) (*task)["a_check"] = checkArr u.MgoEB.UpdateById("task", id, map[string]interface{}{"$set": &task}) } } } // 修改任务状态 func UpTaskState(code []string, num int, reason string, startTime int64) { query := map[string]interface{}{} update := map[string]interface{}{} for _, v := range code { if num == 1 || num == 2 || num == 3 || num == 7 { //id query = map[string]interface{}{ "_id": mongodb.StringTOBsonId(v), } } else { query = map[string]interface{}{ //code "s_code": v, } } if num == 1 { //提交审核 update = map[string]interface{}{ "$set": map[string]interface{}{ "i_state": 3, }, } } else if num == 2 { //打回 -->未通过 update = map[string]interface{}{ "$set": map[string]interface{}{ "i_state": 5, "l_updatetime": time.Now().Unix(), }, } } else if num == 3 { //发布(审核通过) -->审核通过 update = map[string]interface{}{ "$set": map[string]interface{}{ "i_state": 4, "l_updatetime": time.Now().Unix(), "l_uploadtime": time.Now().Unix(), }, } } else if num == 4 { //批量作废 -->关闭 update = map[string]interface{}{ "$set": map[string]interface{}{ "i_state": 6, "l_complete": time.Now().Unix(), "l_updatetime": time.Now().Unix(), }, } } else if num == 7 { //反馈信息 -->待审核 newData := map[string]interface{}{ "l_mrecord_comeintime": startTime, "l_mrecord_complete": time.Now().Unix(), "s_mrecord_remark": reason, } mrecord := []interface{}{} mrecord = append(mrecord, newData) update = map[string]interface{}{ "$set": map[string]interface{}{ "i_state": 3, "l_complete": time.Now().Unix(), "a_mrecord": mrecord, "l_updatetime": time.Now().Unix(), }, } } flag := u.MgoEB.Update("task", query, update, false, true) log.Println("codeOrId:", query, " 修改任务状态:", flag) } } // 更新节点 func (f *Front) ChangeEvent() { auth := qu.IntAll(f.GetSession("auth")) if auth != u.Role_Admin { f.ServeJson("没有权限") } code := f.GetString("code") event, _ := f.GetInt("event") eventok := false for k, _ := range sp.Config.Uploadevents { if event == qu.Int64All(k) { eventok = true break } } if !eventok { f.ServeJson("没有对应节点") return } //info, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code}) info, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code}) if len(*info) > 0 { oldevent := qu.IntAll((*info)["event"]) if qu.IntAll((*info)["state"]) == Sp_state_5 { //源节点下架 _, err := spider.UpdateSpiderByCodeState(code, fmt.Sprint(Sp_state_6), oldevent) set := map[string]interface{}{ "$set": map[string]interface{}{ "event": qu.IntAll(event), "state": Sp_state_6, }, } //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false) u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false) if err != nil && strings.Contains(err.Error(), "timeout") { f.ServeJson("连接节点" + fmt.Sprint(oldevent) + "超时") } else { f.ServeJson(err.Error()) } } else { set := map[string]interface{}{ "$set": map[string]interface{}{ "event": qu.IntAll(event), }, } //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false) u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false) } } else { f.ServeJson("没有对应记录") } } // 验证用户是否有更改状态权限 func IsHasUpState(auth, state int) bool { rep := false switch auth { case u.Role_Dev: if state == Sp_state_1 || state == Sp_state_7 { rep = true } case u.Role_Examine: if state == Sp_state_2 || state == Sp_state_3 { rep = true } case u.Role_Admin: rep = true default: } return rep } var list_fields = `{"_id":1,"code":1,"createuser":1,"modifyuser":1,"modifytime":1,"l_uploadtime":1,"l_checktime":1,"state":1,"param_common":1,"event":1,"urgency":1,"platform":1,"pendstate":1}` // 脚本管理,结合爬虫运行信息 func (f *Front) LuaList() { auth := qu.IntAll(f.GetSession("auth")) if auth != u.Role_Admin { f.ServeJson("没有权限!") return } if f.Method() == "POST" { state, _ := f.GetInteger("state") event, _ := f.GetInteger("event") start, _ := f.GetInteger("start") limit, _ := f.GetInteger("length") draw, _ := f.GetInteger("draw") searchStr := f.GetString("search[value]") //search := strings.Replace(searchStr, " ", "", -1) search := strings.TrimSpace(searchStr) platform := f.GetString("platform") query := map[string]interface{}{} queryArr := []interface{}{} //搜索条件 if search != "" { q1 := map[string]interface{}{} q1["$or"] = []interface{}{ map[string]interface{}{"code": map[string]interface{}{"$regex": search}}, map[string]interface{}{"createuser": map[string]interface{}{"$regex": search}}, map[string]interface{}{"param_common.1": map[string]interface{}{"$regex": search}}, } queryArr = append(queryArr, q1) } //爬虫状态 q2 := map[string]interface{}{} if state > -1 { q2 = map[string]interface{}{"state": state} } else { q2 = map[string]interface{}{ "state": map[string]interface{}{ "$in": []int{Sp_state_3, Sp_state_5, Sp_state_6}, }, } } queryArr = append(queryArr, q2) //爬虫节点 q3 := map[string]interface{}{} if event > -1 { q3 = map[string]interface{}{"event": event} queryArr = append(queryArr, q3) } //爬虫平台 q4 := map[string]interface{}{} if platform != "-1" { q4 = map[string]interface{}{"platform": platform} queryArr = append(queryArr, q4) } query["$and"] = queryArr sort := `{"%s":%d}` orderIndex := f.GetString("order[0][column]") orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex)) orderType := 1 if f.GetString("order[0][dir]") != "asc" { orderType = -1 } sort = fmt.Sprintf(sort, orderName, orderType) page := start / 10 //luas, _ := u.MgoE.Find("luaconfig", query, sort, list_fields, false, start, limit) //count := u.MgoE.Count("luaconfig", query) luas, _ := u.MgoEB.Find("luaconfig", query, sort, list_fields, false, start, limit) count := u.MgoEB.Count("luaconfig", query) qu.Debug("query:", query, start, limit, count, len(*luas)) for k, v := range *luas { v["num"] = k + 1 + page*10 l_uploadtime := qu.Int64All(v["l_uploadtime"]) v["l_uploadtime"] = qu.FormatDateByInt64(&l_uploadtime, qu.Date_Full_Layout) l_checktime := qu.Int64All(v["l_checktime"]) v["l_checktime"] = qu.FormatDateByInt64(&l_checktime, qu.Date_Full_Layout) if l_checktime > 0 { //核对 v["is_check"] = true } else { //未核对 v["is_check"] = false } if tmp, ok := spinfos.Load(v["code"]); ok { info := tmp.(*spinfo) v["modifytime"] = info.lastHeartbeat v["yesterday"] = fmt.Sprint(info.yesterdayDowncount) + "/" + fmt.Sprint(info.yestoDayRequestNum) v["terday"] = fmt.Sprint(info.todayDowncount) + "/" + fmt.Sprint(info.toDayRequestNum) v["lastdowncount"] = info.lastDowncount v["lstate"] = info.lstate } else { v["modifytime"] = "" v["yesterday"] = "" v["terday"] = "" v["lastdowncount"] = 0 v["lstate"] = "" } } f.ServeJson(map[string]interface{}{"draw": draw, "data": luas, "recordsFiltered": count, "recordsTotal": count}) } else { events := []string{} for k, _ := range sp.Config.Uploadevents { events = append(events, k) } sort.Strings(events) f.T["events"] = events f.Render("lualist.html", &f.T) } } // 查看是否有该任务 func CheckTask(codes []string, num int) []string { // var id string = "" query := map[string]interface{}{} var idArr []string if len(codes) > 0 { for _, v := range codes { if num == 1 { query = map[string]interface{}{ "s_code": v, "i_state": map[string]interface{}{ "$in": []int{1, 2, 5}, }, } } else if num == 2 { //打回时查询待审核的任务 query = map[string]interface{}{ "s_code": v, "i_state": 3, } } else if num == 3 { //审核通过时查询待处理、处理中、待审核、未通过的任务 query = map[string]interface{}{ "s_code": v, "i_state": map[string]interface{}{ "$in": []int{1, 2, 3, 5}, }, } } else if num == 7 { query = map[string]interface{}{ "s_code": v, "i_state": map[string]interface{}{ "$in": []int{2, 5}, }, } } task, _ := u.MgoEB.Find("task", query, nil, nil, false, -1, -1) if task != nil { for _, t := range *task { idArr = append(idArr, mongodb.BsonIdToSId(t["_id"])) } } return idArr } } return idArr }