/** 爬虫,脚本接口,需要扩展 */ package spider import ( "errors" "math/rand" mu "mfw/util" qu "qfw/util" util "spider_util" "time" "github.com/yuin/gopher-lua" ) //爬虫() type Spider struct { Script Code string //代码 Name string //站点名称 Channel string //栏目名称 DownDetail bool //是否下载详细页 LastPubshTime int64 //最后发布时间 LastDownloadTime int64 //最后下载时间 SpiderRunRate int64 //执行频率 ExecuteOk bool //任务执行成功/完成标志 Collection string //写入表名 CoverAttr string //判重字段 StoreMode int //存储模式 StoreToMsgEvent int //消息类型 SleepBase int //基本延时 SleepRand int //随机延时 TargetChannelUrl string //栏目页地址 SpiderStartPage, SpiderMaxPage int64 //页码配置 SpiderIsHistoricalMend bool SpiderIsMustDownload bool } //获取最新时间--作为最后更新时间 func (s *Spider) GetLastPublishTime() (timestr string, errs interface{}) { defer mu.Catch() s.Test_goreqtime++ if err := s.L.CallByParam(lua.P{ Fn: s.L.GetGlobal("getLastPublishTime"), NRet: 1, Protect: true, }); err != nil { errs = err.Error() return "", errs } ret := s.L.Get(-1) s.L.Pop(1) if str, ok := ret.(lua.LString); ok { timestr = string(str) } if s.LastPubshTime < util.ParseDate2Int64(timestr) { //防止发布时间超前 if util.ParseDate2Int64(timestr) > time.Now().Unix() { s.LastPubshTime = time.Now().Unix() } else { s.LastPubshTime = util.ParseDate2Int64(timestr) } } timestr = time.Unix(s.LastPubshTime, 0).Format(qu.Date_Full_Layout) return timestr, nil } //获取最新时间--作为最后更新时间 func (s *Spider) GetLastPublishTimeTest() (timestr interface{}, errs interface{}) { defer mu.Catch() if err := s.L.CallByParam(lua.P{ Fn: s.L.GetGlobal("getLastPublishTime"), NRet: 1, Protect: true, }); err != nil { errs = err.Error() return "", errs } ret := s.L.Get(-1) return ret, nil } //下载列表 func (s *Spider) DownListPageItem() (list []map[string]interface{}, errs interface{}) { defer mu.Catch() s.Test_goreqlist++ for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ { if err := s.L.CallByParam(lua.P{ Fn: s.L.GetGlobal("downloadAndParseListPage"), NRet: 1, Protect: true, }, lua.LNumber(s.SpiderStartPage)); err != nil { errs = err.Error() } lv := s.L.Get(-1) s.L.Pop(1) if tbl, ok := lv.(*lua.LTable); ok { for i := 1; i <= tbl.Len(); i++ { v := tbl.RawGetInt(i).(*lua.LTable) tmp := util.GetTable(v) if qu.ObjToString(tmp["exit"]) == "true" { break } list = append(list, util.GetTable(v)) } } } return list, errs } //下载列表 func (s *Spider) DownListPageItemTest() (list []interface{}, errs interface{}) { defer mu.Catch() for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ { if err := s.L.CallByParam(lua.P{ Fn: s.L.GetGlobal("downloadAndParseListPage"), NRet: 1, Protect: true, }, lua.LNumber(s.SpiderStartPage)); err != nil { errs = err.Error() } lv := s.L.Get(-1) s.L.Pop(1) if tbl, ok := lv.(*lua.LTable); ok { var fors = 0 for i := 1; i <= tbl.Len(); i++ { v, ok := tbl.RawGetInt(i).(*lua.LTable) if ok { tmp := util.GetTable(v) if qu.ObjToString(tmp["exit"]) == "true" { break } fors = -1 list = append(list, util.GetTable(v)) } } if fors == 0 { return []interface{}{util.GetTableEx(tbl)}, errors.New("no") } } else { return []interface{}{lv}, errors.New("no") } } return list, errs } //下载解析内容页 func (s *Spider) DownloadDetailPage(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) { defer mu.Catch() s.Test_goreqcon++ tab := s.L.NewTable() for k, v := range param { tab.RawSet(lua.LString(k), lua.LString(v)) } var err error if err = s.L.CallByParam(lua.P{ Fn: s.L.GetGlobal("downloadDetailPage"), NRet: 1, Protect: true, }, tab); err != nil { return data, err } lv := s.L.Get(-1) s.L.Pop(1) //拼map if v3, ok := lv.(*lua.LTable); ok { v3.ForEach(func(k, v lua.LValue) { if tmp, ok := k.(lua.LString); ok { key := string(tmp) if value, ok := v.(lua.LString); ok { data[key] = string(value) } else if value, ok := v.(lua.LNumber); ok { data[key] = value } else if value, ok := v.(*lua.LTable); ok { tmp := util.TableToMap(value) data[key] = tmp } } }) return data, err } else { return nil, err } } //下载解析内容页 func (s *Spider) DownloadDetailPageTest(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) { defer mu.Catch() tab := s.L.NewTable() for k, v := range param { tab.RawSet(lua.LString(k), lua.LString(v)) } //co := s.L.NewThread() //co.ScriptFileName = s.L.ScriptFileName //defer co.Close() var err error if err = s.L.CallByParam(lua.P{ Fn: s.L.GetGlobal("downloadDetailPage"), NRet: 1, Protect: true, }, tab); err != nil { return data, err } lv := s.L.Get(-1) s.L.Pop(1) var flag = 0 //拼map if v3, ok := lv.(*lua.LTable); ok { v3.ForEach(func(k, v lua.LValue) { if tmp, ok := k.(lua.LString); ok { key := string(tmp) if value, ok := v.(lua.LString); ok { data[key] = string(value) } else if value, ok := v.(lua.LNumber); ok { data[key] = value } else if value, ok := v.(*lua.LTable); ok { tmp := util.TableToMap(value) data[key] = tmp } } else { flag = -1 return } }) if flag == -1 { return map[string]interface{}{ "no": util.GetTableEx(lv.(*lua.LTable)), }, errors.New("no") } else { return data, err } } else { return map[string]interface{}{ "no": lv, }, errors.New("no") } } //获取随机数 func GetRandMath(num int) int { r := rand.New(rand.NewSource(time.Now().UnixNano())) return r.Intn(num) }