package spider import ( "encoding/json" "fmt" "log" mu "mfw/util" "os" "qfw/util" "sort" lu "spiderutil" "strings" "sync/atomic" "time" "github.com/go-xweb/xweb" "github.com/tealeg/xlsx" ) type Front struct { *xweb.Action login xweb.Mapper `xweb:"/"` //登录 loadIndex xweb.Mapper `xweb:"/front"` //控制中心 start xweb.Mapper `xweb:"/front/start"` stop xweb.Mapper `xweb:"/front/stop"` pass xweb.Mapper `xweb:"/front/pass"` resume xweb.Mapper `xweb:"/front/resume"` reloadlua xweb.Mapper `xweb:"/front/reloadlua"` addSpider xweb.Mapper `xweb:"/front/addSpider"` delSpider xweb.Mapper `xweb:"/front/delSpider/([^.]*).html"` addJob xweb.Mapper `xweb:"/front/addJob"` searchJob xweb.Mapper `xweb:"/front/searchJob"` clearErr xweb.Mapper `xweb:"/front/clearErr"` viewErrIndex xweb.Mapper `xweb:"/front/viewErrIndex/([^.]*).html"` viewErrDetail xweb.Mapper `xweb:"/front/viewErrDetail"` updateDownLimit xweb.Mapper `xweb:"/front/updateDownLimit"` fastStartOrStop xweb.Mapper `xweb:"/front/fastStartOrStop"` getscript xweb.Mapper `xweb:"/front/getscript"` sendinfo xweb.Mapper `xweb:"/front/sendinfo"` getAllSpider xweb.Mapper `xweb:"/front/getAllSpider"` } const ( PageSize = 10 ) //调用保存信息接口 func (i *Front) Sendinfo() error { id := i.GetString("id") info, _ := MgoS.FindById("data_bak", id, nil) delete(*info, "_id") tmp, _ := json.Marshal([]interface{}{"title", []interface{}{&info}}) bs, err := Msclient.Call("", mu.UUID(8), 4002, mu.SENDTO_TYPE_ALL_RECIVER, tmp, 30) log.Println(string(bs)) log.Println(err) reps := "" if err == nil && strings.ToLower(string(bs)) == "true" { reps = "send success" } else { reps = "send fail" } i.ServeJson(reps) return nil } func (i *Front) Getscript() error { code := i.GetString("code") var script = "" if v, ok := Allspiders.Load(code); ok { sp := v.(*Spider) script = sp.ScriptFile } log.Println("----------------------------") log.Println(script) log.Println("----------------------------") i.ServeJson(script) return nil } func (i *Front) Login() error { name := i.GetString("s_name") pass := i.GetString("s_pwd") if name != "" && pass != "" { tmp, _ := MgoS.FindOne("spider_admin", map[string]interface{}{"loginname": name, "password": util.GetMd5String(pass)}) if len(*tmp) > 0 { i.SetSession("loginuser", *tmp) } return i.Redirect("/front") } else { i.T["login"] = false return i.Render("login.html", &i.T) } } //加载首页 func (i *Front) LoadIndex() error { qcode := i.GetString("qcode") currPage, _ := i.GetInt("currPage") state := i.GetString("state") if currPage < 1 { currPage = 1 } var spRunTotal, todayDownTotal, yesdayDownTotal, errTotal, todayRequest, yestoDayRequest, allspidersLen int32 Allspiders.Range(func(key, value interface{}) bool { v := value.(*Spider) errTotal = errTotal + v.ErrorNum todayDownTotal = todayDownTotal + v.TodayDowncount yesdayDownTotal = yesdayDownTotal + v.YesterdayDowncount if !v.Stop && !v.Pass { spRunTotal = spRunTotal + 1 } todayRequest = todayRequest + v.ToDayRequestNum yestoDayRequest = yestoDayRequest + v.YestoDayRequestNum allspidersLen = allspidersLen + 1 return true }) i.T["spTotal"] = allspidersLen i.T["spRunTotal"] = spRunTotal i.T["todayDownTotal"] = todayDownTotal i.T["yesdayDownTotal"] = yesdayDownTotal i.T["errTotal"] = errTotal i.T["todayRequest"] = todayRequest i.T["yestoDayRequest"] = yestoDayRequest i.T["qcode"] = qcode totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } i.T["totalPage"] = totalPage i.T["currPage"] = currPage if lu.Config.Working == 1 { //排队模式 length := 0 LoopListPath.Range(func(k, v interface{}) bool { if v != nil { length++ } return true }) i.T["listnum"] = length } else { i.T["listnum"] = "" } i.T["spiders"] = getSpiders(qcode, state, currPage) return i.Render("index.html", &i.T) } /* //一键加载 func (i *Front) FastStartOrStop() error { if lu.Config.Working == 1 { i.ServeJson("fail") return nil } flag := i.GetString("flag") Allspiders.Range(func(key, value interface{}) bool { v := value.(*Spider) lu.TimeSleepFunc(50*time.Millisecond, TimeSleepChan) switch flag { case "start": v.Stop = false v.StartJob() case "stop": v.Stop = true SaveDownCount(v.Code, false, v.TodayDowncount, v.ToDayRequestNum, v.YesterdayDowncount, v.YestoDayRequestNum) } return true }) i.ServeJson("ok") return nil } //开启爬虫 func (i *Front) Start() error { if lu.Config.Working == 1 { return nil } qcode := i.GetString("qcode") code := i.GetString("code") currPage, _ := i.GetInt("currPage") state := i.GetString("state") if currPage < 1 { currPage = 1 } if value, ok := Allspiders.Load(code); ok { v := value.(*Spider) v.Stop = false v.StartJob() } res := make(map[string]interface{}) res["qcode"] = qcode allspidersLen := 0 Allspiders.Range(func(key, value interface{}) bool { allspidersLen += 1 return true }) totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } res["totalPage"] = totalPage res["currPage"] = currPage res["spiders"] = getSpiders(qcode, state, currPage) i.ServeJson(&res) return nil } //停止爬虫 func (i *Front) Stop() error { if lu.Config.Working == 1 { return nil } code := i.GetString("code") qcode := i.GetString("qcode") currPage, _ := i.GetInt("currPage") state := i.GetString("state") if currPage < 1 { currPage = 1 } if value, ok := Allspiders.Load(code); ok { v := value.(*Spider) v.Stop = true SaveDownCount(v.Code, false, v.TodayDowncount, v.ToDayRequestNum, v.YesterdayDowncount, v.YestoDayRequestNum) } res := make(map[string]interface{}) res["qcode"] = qcode allspidersLen := 0 Allspiders.Range(func(key, value interface{}) bool { allspidersLen += 1 return true }) totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } res["totalPage"] = totalPage res["currPage"] = currPage res["spiders"] = getSpiders(qcode, state, currPage) i.ServeJson(&res) return nil } //暂停爬虫 func (i *Front) Pass() error { if lu.Config.Working == 1 { return nil } code := i.GetString("code") qcode := i.GetString("qcode") currPage, _ := i.GetInt("currPage") state := i.GetString("state") if currPage < 1 { currPage = 1 } if value, ok := Allspiders.Load(code); ok { v := value.(*Spider) v.Pass = true } res := make(map[string]interface{}) res["qcode"] = qcode allspidersLen := 0 Allspiders.Range(func(key, value interface{}) bool { allspidersLen += 1 return true }) totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } res["totalPage"] = totalPage res["currPage"] = currPage res["spiders"] = getSpiders(qcode, state, currPage) i.ServeJson(&res) return nil } //唤醒爬虫 func (i *Front) Resume() error { if lu.Config.Working == 1 { return nil } code := i.GetString("code") qcode := i.GetString("qcode") currPage, _ := i.GetInt("currPage") state := i.GetString("state") if currPage < 1 { currPage = 1 } if value, ok := Allspiders.Load(code); ok { v := value.(*Spider) v.Pass = false } res := make(map[string]interface{}) res["qcode"] = qcode allspidersLen := 0 Allspiders.Range(func(key, value interface{}) bool { allspidersLen += 1 return true }) totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } res["totalPage"] = totalPage res["currPage"] = currPage res["spiders"] = getSpiders(qcode, state, currPage) i.ServeJson(&res) return nil } //重新加载lua脚本 func (i *Front) Reloadlua() error { if lu.Config.Working == 1 { i.ServeJson("排队模式不需要重载!") return nil } code := i.GetString("code") qcode := i.GetString("qcode") currPage, _ := i.GetInt("currPage") state := i.GetString("state") if currPage < 1 { currPage = 1 } info := "" if value, ok := Allspiders.Load(code); ok { spider := value.(*Spider) spider.Reload() spider.LoadScript(code, spider.ScriptFile, false) spider.DownDetail = spider.GetBoolVar("spiderDownDetailPage") spider.Collection = spider.GetVar("spider2Collection") spider.SpiderRunRate = int64(spider.GetIntVar("spiderRunRate")) spider.StoreToMsgEvent = spider.GetIntVar("spiderStoreToMsgEvent") spider.StoreMode = spider.GetIntVar("spiderStoreMode") spider.CoverAttr = spider.GetVar("spiderCoverAttr") spiderSleepBase := spider.GetIntVar("spiderSleepBase") if spiderSleepBase == -1 { spider.SleepBase = 1000 } else { spider.SleepBase = spiderSleepBase } spiderSleepRand := spider.GetIntVar("spiderSleepRand") if spiderSleepRand == -1 { spider.SleepRand = 1000 } else { spider.SleepRand = spiderSleepRand } spiderTimeout := spider.GetIntVar("spiderTimeout") if spiderTimeout == -1 { spider.Timeout = 60 } else { spider.Timeout = int64(spiderTimeout) } spider.TargetChannelUrl = spider.GetVar("spiderTargetChannelUrl") spider.UserName = spider.GetVar("spiderUserName") spider.UserEmail = spider.GetVar("spiderUserEmail") spider.UploadTime = spider.GetVar("spiderUploadTime") info = "已重新加载" } res := make(map[string]interface{}) res["qcode"] = qcode allspidersLen := 0 Allspiders.Range(func(key, value interface{}) bool { allspidersLen += 1 return true }) totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } res["totalPage"] = totalPage res["currPage"] = currPage res["spiders"] = getSpiders(qcode, state, currPage) res["info"] = info i.ServeJson(&res) return nil } //添加爬虫 func (i *Front) AddSpider() error { i.ServeJson("添加任务已作废!") return nil res := "添加任务失败,请输入正确的lua脚本路径!" filename := i.GetString("filename") f, err := os.Open("./" + filename) if err != nil { res = err.Error() } else { for _, v := range Allspiders { if v.Script.ScriptFile == filename { res = v.Script.ScriptFile + ",任务已经存在,请不要重复添加" i.ServeJson(&res) return nil } } if strings.Contains(f.Name(), "spider_") && strings.HasSuffix(f.Name(), ".lua") { sp := NewSpider(filename) Allspiders[sp.Code] = sp go sp.StartJob() res = "添加任务成功!" } else { res = "请输入正确的lua脚本名称(spider_test.lua)!" } } i.ServeJson(&res) return nil } //删除爬虫 func (i *Front) DelSpider(code string) error { flag := "fail" msg := "del " + code Allspiders.Range(func(k, value interface{}) bool { v := value.(*Spider) if lu.Config.Working == 0 { if k == code { v.Stop = true v.L.Close() os.Remove(v.ScriptFile) Allspiders.Delete(k) flag = "ok" return false } } else { } return true }) res := map[string]string{"msg": msg, "flag": flag} i.ServeJson(&res) return nil } //添加任务 func (i *Front) AddJob() error { res := "" succ, fail := 0, 0 jobs := i.GetString("jobs") list := []interface{}{} json.Unmarshal([]byte(jobs), &list) if len(list) < 1 { res = "数据格式不正确!" i.ServeJson(&res) return nil } listJob := []interface{}{} var sp *Spider for _, v := range list { tmp := v.(map[string]interface{}) code, ok1 := tmp["code"].(string) _, ok2 := tmp["href"].(string) _, ok3 := tmp["title"].(string) if !ok1 || !ok2 || !ok3 { fail++ continue } if s, ok := Allspiders.Load(code); ok { sp = s.(*Spider) p := make(map[string]string) for key, value := range tmp { p[key] = util.ObjToString(value) } p["publishtime"] = fmt.Sprint(time.Now().Unix()) listJob = append(listJob, p) succ++ } else { fail++ } } if succ > 0 { go mu.NewGoThread(int(sp.Thread)).Run(sp.DownloadDetailByNames, listJob...) res += "添加成功:" + fmt.Sprint(succ) + "条任务!" } if fail > 0 { res += "添加失败:" + fmt.Sprint(fail) + "条任务!" } i.ServeJson(&res) return nil } */ //searchJob func (i *Front) SearchJob() error { qcode := i.GetString("qcode") currPage, _ := i.GetInt("currPage") state := i.GetString("state") if currPage < 1 { currPage = 1 } res := make(map[string]interface{}) res["qcode"] = qcode allspidersLen := 0 Allspiders.Range(func(key, value interface{}) bool { allspidersLen += 1 return true }) totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } res["totalPage"] = totalPage res["currPage"] = currPage res["spiders"] = getSpiders(qcode, state, currPage) i.ServeJson(&res) return nil } //错误查看 func (i *Front) ViewErrIndex(code string) error { i.T["code"] = code tmp, _ := MgoS.FindOne("spider_ldtime", map[string]interface{}{"code": code}) if len(*tmp) > 1 { i.T["lastpubtime"] = (*tmp)["lastpubtimestr"] } return i.Render("viewErr.html", &i.T) } //错误信息 func (i *Front) ViewErrDetail() error { code := i.GetString("code") res := make(map[string]interface{}) if value, ok := Allspiders.Load(code); ok { v := value.(*Spider) res["name"] = v.Name res["errorNum"] = v.ErrorNum res["lastDowncount"] = v.LastDowncount res["lastExecTime"] = time.Unix(util.Int64All(v.LastExecTime), 0).Format(util.Date_Full_Layout) //res["totalDowncount"] = v.TotalDowncount res["todayDowncount"] = v.TodayDowncount res["yestoDayDowncount"] = v.YesterdayDowncount res["todayRequest"] = v.ToDayRequestNum //res["totalRequest"] = v.TotalRequestNum res["yestoDayRequest"] = v.YestoDayRequestNum res["lowlimit"] = v.LowerLimit res["uplimit"] = v.UpperLimit } list, _ := MgoS.Find("spider_errlog", map[string]interface{}{"code": code}, map[string]interface{}{"comeintime": -1}, map[string]interface{}{"comeintime": 1, "url": 1, "content": 1}, false, -1, -1) for _, v := range *list { v["comeintime"] = time.Unix(util.Int64All(v["comeintime"]), 0).Format(util.Date_Full_Layout) if v["url"] == nil { v["url"] = "监控错误日志" } } res["list"] = *list i.ServeJson(&res) return nil } //错误清零 func (i *Front) ClearErr() error { code := i.GetString("code") qcode := i.GetString("qcode") state := i.GetString("state") currPage, _ := i.GetInt("currPage") if value, ok := Allspiders.Load(code); ok { v := value.(*Spider) atomic.StoreInt32(&v.ErrorNum, 0) MgoS.Del("spider_errlog", map[string]interface{}{"code": code}) } res := make(map[string]interface{}) res["qcode"] = qcode allspidersLen := 0 Allspiders.Range(func(key, value interface{}) bool { allspidersLen += 1 return true }) totalPage := int64((allspidersLen + PageSize - 1) / PageSize) if currPage > totalPage { currPage = totalPage } res["totalPage"] = totalPage res["currPage"] = currPage res["spiders"] = getSpiders(qcode, state, currPage) i.ServeJson(&res) return nil } //修改正常下载数的上下限 func (i *Front) UpdateDownLimit() error { code := i.GetString("code") uplimit, _ := i.GetInt("uplimit") lowlimit, _ := i.GetInt("lowlimit") lastpubtime := i.GetString("lastpubtime") var updata map[string]interface{} if lastpubtime == "" { updata = map[string]interface{}{"$set": map[string]interface{}{"uplimit": uplimit, "lowlimit": lowlimit}} } else { time, _ := time.ParseInLocation(util.Date_Full_Layout, lastpubtime, time.Local) updata = map[string]interface{}{"$set": map[string]interface{}{"uplimit": uplimit, "lowlimit": lowlimit, "lastpubtime": time.Unix(), "lastpubtimestr": lastpubtime}} } b := MgoS.Update("spider_ldtime", map[string]interface{}{"code": code}, updata, true, false) if b { if value, ok := Allspiders.Load(code); ok { v := value.(*Spider) v.UpperLimit = int(uplimit) v.LowerLimit = int(lowlimit) } } i.ServeJson(b) return nil } func getSpiders(code, state string, currPage int64) []interface{} { spiders := []interface{}{} Allspiders.Range(func(key, value interface{}) bool { v := value.(*Spider) if code != "" && !strings.Contains(fmt.Sprint(key), code) && !strings.Contains(v.Name, code) { return true } if state == "stop" { if !v.Stop { return true } } if state == "pass" { if !v.Pass { return true } } if state == "run" { if v.Stop || v.Pass { return true } } spider := make(map[string]interface{}) spider["code"] = fmt.Sprint(key) spider["name"] = v.Name spider["stop"] = v.Stop spider["pass"] = v.Pass spider["filepath"] = v.ScriptFile spider["errnum"] = v.Script.ErrorNum spider["lstate"] = v.L.Status(v.L) if v.ExecuteOkTime == 0 { spider["lastTime"] = "开始时间:" + fmt.Sprint(time.Unix(v.LastExecTime, 0).Format(util.Date_Full_Layout)) + ",结束时间:0" } else { spider["lastTime"] = "开始时间:" + fmt.Sprint(time.Unix(v.LastExecTime, 0).Format(util.Date_Full_Layout)) + ",结束时间:" + fmt.Sprint(time.Unix(v.ExecuteOkTime, 0).Format(util.Date_Full_Layout)) } spider["lastTimeShort"] = time.Unix(v.LastExecTime, 0).Format("15:04:05") spider["lastDownNum"] = v.LastDowncount spider["todayDownNum"] = v.TodayDowncount spider["yesterdayDownNum"] = v.YesterdayDowncount spider["totalDowncount"] = v.TotalDowncount spider["totalRequest"] = v.TotalRequestNum spider["todayRequest"] = v.ToDayRequestNum spider["yestoDayRequest"] = v.YestoDayRequestNum spider["noDownloadNum"] = v.NoDownloadNum spider["lastHeartbeat"] = time.Unix(v.LastHeartbeat, 0).Format(util.Date_Full_Layout) spider["targetChannelUrl"] = v.TargetChannelUrl spider["lowlimit"] = v.LowerLimit spider["uplimit"] = v.UpperLimit spider["userName"] = v.UserName if state == "abnormal" { if v.YesterdayDowncount == 0 { if !v.Stop { spiders = append(spiders, spider) } } else { if v.YestoDayRequestNum/v.YesterdayDowncount > 200 { spiders = append(spiders, spider) } } } else if state == "norequest" { if v.ToDayRequestNum == 0 { spiders = append(spiders, spider) } } else { spiders = append(spiders, spider) } return true }) insertSort(spiders, "code") insertSort(spiders, "errnum") arrs := []interface{}{} num := int64(0) for _, v := range spiders { num = num + 1 if num > (currPage-1)*PageSize && num <= currPage*PageSize { arrs = append(arrs, v) } } return arrs } //插入排序 func insertSort(list []interface{}, index string) { for i := 1; i < len(list); i++ { tmp1 := list[i].(map[string]interface{}) tmp2 := list[i-1].(map[string]interface{}) if _, ok := tmp1[index].(int32); ok { if util.IntAll(tmp1[index]) > util.IntAll(tmp2[index]) { j := i - 1 temp := list[i].(map[string]interface{}) for j >= 0 && util.IntAll(list[j].(map[string]interface{})[index]) < util.IntAll(temp[index]) { list[j+1] = list[j] j-- } list[j+1] = temp } } else if _, ok := tmp1[index].(string); ok { if tmp1[index].(string) < tmp2[index].(string) { j := i - 1 temp := list[i].(map[string]interface{}) for j >= 0 && list[j].(map[string]interface{})[index].(string) > temp[index].(string) { list[j+1] = list[j] j-- } list[j+1] = temp } } } } //获取所有内存中的爬虫 func (i *Front) GetAllSpider() { defer util.Catch() //Allspiders allspidersLen, spidersLen_ok := 0, 0 AllspiderCodes := []string{} Allspiders.Range(func(key, value interface{}) bool { allspidersLen++ if value != nil { spidersLen_ok++ v := value.(*Spider) AllspiderCodes = append(AllspiderCodes, v.Code) } return true }) sort.Strings(AllspiderCodes) //LoopListPath size_all, size_ok, size_no := 0, 0, 0 //size_no_index := []string{} LoopListPathCodes := []string{} LoopListPath.Range(func(k, v interface{}) bool { size_all++ if v != nil { size_ok++ info, _ := v.(map[string]string) code := info["code"] LoopListPathCodes = append(LoopListPathCodes, code) } else { //size_no_index = append(size_no_index, fmt.Sprint(k)) size_no++ } return true }) sort.Strings(LoopListPathCodes) //excle xf, err := xlsx.OpenFile("res/spidercodes.xlsx") if err != nil { log.Println("spidercodes file not foud", err.Error()) return } //sheet 统计 sh0 := xf.Sheets[0] row := sh0.AddRow() row.AddCell().SetValue(size_all) row.AddCell().SetValue(size_ok) row.AddCell().SetValue(size_no) //row.AddCell().SetValue(strings.Join(size_no_index, ",")) row.AddCell().SetValue(allspidersLen) row.AddCell().SetValue(spidersLen_ok) //sheet codes sh1 := xf.Sheets[1] LoopLen := len(LoopListPathCodes) tmpLen := LoopLen allLen := len(AllspiderCodes) if tmpLen < allLen { tmpLen = allLen } for j := 0; j < tmpLen; j++ { row := sh1.AddRow() cell1 := "" cell2 := "" if j < LoopLen { cell1 = LoopListPathCodes[j] } if j < allLen { cell2 = AllspiderCodes[j] } row.AddCell().SetValue(cell1) row.AddCell().SetValue(cell2) } fname := fmt.Sprintf("res/爬虫代码%d.xlsx", time.Now().Unix()) xf.Save(fname) arr := strings.Split(fname, "/") i.ResponseWriter.Header().Add("Content-Disposition", fmt.Sprintf("attachment; filename=%s", arr[1])) i.ServeFile(fname) go func(path string) { time.Sleep(time.Second * 30) os.Remove(fname) }(fname) }