|
- /**
- 爬虫,脚本接口,需要扩展
- */
- package spider
- import (
- "errors"
- "math/rand"
- mu "mfw/util"
- qu "qfw/util"
- util "spider_util"
- "time"
- "github.com/yuin/gopher-lua"
- )
- //爬虫()
- type Spider struct {
- Script
- Code string //代码
- Name string //站点名称
- Channel string //栏目名称
- DownDetail bool //是否下载详细页
- LastPubshTime int64 //最后发布时间
- LastDownloadTime int64 //最后下载时间
- SpiderRunRate int64 //执行频率
- ExecuteOk bool //任务执行成功/完成标志
- Collection string //写入表名
- CoverAttr string //判重字段
- StoreMode int //存储模式
- StoreToMsgEvent int //消息类型
- SleepBase int //基本延时
- SleepRand int //随机延时
- TargetChannelUrl string //栏目页地址
- SpiderStartPage, SpiderMaxPage int64 //页码配置
- SpiderIsHistoricalMend bool
- SpiderIsMustDownload bool
- }
- //获取最新时间--作为最后更新时间
- func (s *Spider) GetLastPublishTime() (timestr string, errs interface{}) {
- defer mu.Catch()
- s.Test_goreqtime++
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("getLastPublishTime"),
- NRet: 1,
- Protect: true,
- }); err != nil {
- errs = err.Error()
- return "", errs
- }
- ret := s.L.Get(-1)
- s.L.Pop(1)
- if str, ok := ret.(lua.LString); ok {
- timestr = string(str)
- }
- if s.LastPubshTime < util.ParseDate2Int64(timestr) {
- //防止发布时间超前
- if util.ParseDate2Int64(timestr) > time.Now().Unix() {
- s.LastPubshTime = time.Now().Unix()
- } else {
- s.LastPubshTime = util.ParseDate2Int64(timestr)
- }
- }
- timestr = time.Unix(s.LastPubshTime, 0).Format(qu.Date_Full_Layout)
- return timestr, nil
- }
- //获取最新时间--作为最后更新时间
- func (s *Spider) GetLastPublishTimeTest() (timestr interface{}, errs interface{}) {
- defer mu.Catch()
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("getLastPublishTime"),
- NRet: 1,
- Protect: true,
- }); err != nil {
- errs = err.Error()
- return "", errs
- }
- ret := s.L.Get(-1)
- return ret, nil
- }
- //下载列表
- func (s *Spider) DownListPageItem() (list []map[string]interface{}, errs interface{}) {
- defer mu.Catch()
- s.Test_goreqlist++
- for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ {
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadAndParseListPage"),
- NRet: 1,
- Protect: true,
- }, lua.LNumber(s.SpiderStartPage)); err != nil {
- errs = err.Error()
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- if tbl, ok := lv.(*lua.LTable); ok {
- for i := 1; i <= tbl.Len(); i++ {
- v := tbl.RawGetInt(i).(*lua.LTable)
- tmp := util.GetTable(v)
- if qu.ObjToString(tmp["exit"]) == "true" {
- break
- }
- list = append(list, util.GetTable(v))
- }
- }
- }
- return list, errs
- }
- //下载列表
- func (s *Spider) DownListPageItemTest() (list []interface{}, errs interface{}) {
- defer mu.Catch()
- for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ {
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadAndParseListPage"),
- NRet: 1,
- Protect: true,
- }, lua.LNumber(s.SpiderStartPage)); err != nil {
- errs = err.Error()
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- if tbl, ok := lv.(*lua.LTable); ok {
- var fors = 0
- for i := 1; i <= tbl.Len(); i++ {
- v, ok := tbl.RawGetInt(i).(*lua.LTable)
- if ok {
- tmp := util.GetTable(v)
- if qu.ObjToString(tmp["exit"]) == "true" {
- break
- }
- fors = -1
- list = append(list, util.GetTable(v))
- }
- }
- if fors == 0 {
- return []interface{}{util.GetTableEx(tbl)}, errors.New("no")
- }
- } else {
- return []interface{}{lv}, errors.New("no")
- }
- }
- return list, errs
- }
- //下载解析内容页
- func (s *Spider) DownloadDetailPage(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) {
- defer mu.Catch()
- s.Test_goreqcon++
- tab := s.L.NewTable()
- for k, v := range param {
- tab.RawSet(lua.LString(k), lua.LString(v))
- }
- var err error
- if err = s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadDetailPage"),
- NRet: 1,
- Protect: true,
- }, tab); err != nil {
- return data, err
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- //拼map
- if v3, ok := lv.(*lua.LTable); ok {
- v3.ForEach(func(k, v lua.LValue) {
- if tmp, ok := k.(lua.LString); ok {
- key := string(tmp)
- if value, ok := v.(lua.LString); ok {
- data[key] = string(value)
- } else if value, ok := v.(lua.LNumber); ok {
- data[key] = value
- } else if value, ok := v.(*lua.LTable); ok {
- tmp := util.TableToMap(value)
- data[key] = tmp
- }
- }
- })
- return data, err
- } else {
- return nil, err
- }
- }
- //下载解析内容页
- func (s *Spider) DownloadDetailPageTest(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) {
- defer mu.Catch()
- tab := s.L.NewTable()
- for k, v := range param {
- tab.RawSet(lua.LString(k), lua.LString(v))
- }
- //co := s.L.NewThread()
- //co.ScriptFileName = s.L.ScriptFileName
- //defer co.Close()
- var err error
- if err = s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadDetailPage"),
- NRet: 1,
- Protect: true,
- }, tab); err != nil {
- return data, err
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- var flag = 0
- //拼map
- if v3, ok := lv.(*lua.LTable); ok {
- v3.ForEach(func(k, v lua.LValue) {
- if tmp, ok := k.(lua.LString); ok {
- key := string(tmp)
- if value, ok := v.(lua.LString); ok {
- data[key] = string(value)
- } else if value, ok := v.(lua.LNumber); ok {
- data[key] = value
- } else if value, ok := v.(*lua.LTable); ok {
- tmp := util.TableToMap(value)
- data[key] = tmp
- }
- } else {
- flag = -1
- return
- }
- })
- if flag == -1 {
- return map[string]interface{}{
- "no": util.GetTableEx(lv.(*lua.LTable)),
- }, errors.New("no")
- } else {
- return data, err
- }
- } else {
- return map[string]interface{}{
- "no": lv,
- }, errors.New("no")
- }
- }
- //获取随机数
- func GetRandMath(num int) int {
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- return r.Intn(num)
- }
|