123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245 |
- /**
- 爬虫,脚本接口,需要扩展
- */
- package spider
- import (
- "errors"
- "math/rand"
- mu "mfw/util"
- qu "qfw/util"
- util "spider_util"
- "time"
- "github.com/yuin/gopher-lua"
- )
- //爬虫()
- type Spider struct {
- Script
- Code string //代码
- Name string //站点名称
- Channel string //栏目名称
- DownDetail bool //是否下载详细页
- LastPubshTime int64 //最后发布时间
- LastDownloadTime int64 //最后下载时间
- SpiderRunRate int64 //执行频率
- ExecuteOk bool //任务执行成功/完成标志
- Collection string //写入表名
- CoverAttr string //判重字段
- StoreMode int //存储模式
- StoreToMsgEvent int //消息类型
- SleepBase int //基本延时
- SleepRand int //随机延时
- TargetChannelUrl string //栏目页地址
- SpiderStartPage, SpiderMaxPage int64 //页码配置
- SpiderIsHistoricalMend bool
- SpiderIsMustDownload bool
- }
- //获取最新时间--作为最后更新时间
- func (s *Spider) GetLastPublishTime() (timestr string, errs interface{}) {
- defer mu.Catch()
- s.Test_goreqtime++
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("getLastPublishTime"),
- NRet: 1,
- Protect: true,
- }); err != nil {
- errs = err.Error()
- return "", errs
- }
- ret := s.L.Get(-1)
- s.L.Pop(1)
- if str, ok := ret.(lua.LString); ok {
- timestr = string(str)
- }
- if s.LastPubshTime < util.ParseDate2Int64(timestr) {
- //防止发布时间超前
- if util.ParseDate2Int64(timestr) > time.Now().Unix() {
- s.LastPubshTime = time.Now().Unix()
- } else {
- s.LastPubshTime = util.ParseDate2Int64(timestr)
- }
- }
- timestr = time.Unix(s.LastPubshTime, 0).Format(qu.Date_Full_Layout)
- return timestr, nil
- }
- //获取最新时间--作为最后更新时间
- func (s *Spider) GetLastPublishTimeTest() (timestr interface{}, errs interface{}) {
- defer mu.Catch()
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("getLastPublishTime"),
- NRet: 1,
- Protect: true,
- }); err != nil {
- errs = err.Error()
- return "", errs
- }
- ret := s.L.Get(-1)
- return ret, nil
- }
- //下载列表
- func (s *Spider) DownListPageItem() (list []map[string]interface{}, errs interface{}) {
- defer mu.Catch()
- s.Test_goreqlist++
- for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ {
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadAndParseListPage"),
- NRet: 1,
- Protect: true,
- }, lua.LNumber(s.SpiderStartPage)); err != nil {
- errs = err.Error()
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- if tbl, ok := lv.(*lua.LTable); ok {
- for i := 1; i <= tbl.Len(); i++ {
- v := tbl.RawGetInt(i).(*lua.LTable)
- tmp := util.GetTable(v)
- if qu.ObjToString(tmp["exit"]) == "true" {
- break
- }
- list = append(list, util.GetTable(v))
- }
- }
- }
- return list, errs
- }
- //下载列表
- func (s *Spider) DownListPageItemTest() (list []interface{}, errs interface{}) {
- defer mu.Catch()
- for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ {
- if err := s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadAndParseListPage"),
- NRet: 1,
- Protect: true,
- }, lua.LNumber(s.SpiderStartPage)); err != nil {
- errs = err.Error()
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- if tbl, ok := lv.(*lua.LTable); ok {
- var fors = 0
- for i := 1; i <= tbl.Len(); i++ {
- v, ok := tbl.RawGetInt(i).(*lua.LTable)
- if ok {
- tmp := util.GetTable(v)
- if qu.ObjToString(tmp["exit"]) == "true" {
- break
- }
- fors = -1
- list = append(list, util.GetTable(v))
- }
- }
- if fors == 0 {
- return []interface{}{util.GetTableEx(tbl)}, errors.New("no")
- }
- } else {
- return []interface{}{lv}, errors.New("no")
- }
- }
- return list, errs
- }
- //下载解析内容页
- func (s *Spider) DownloadDetailPage(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) {
- defer mu.Catch()
- s.Test_goreqcon++
- tab := s.L.NewTable()
- for k, v := range param {
- tab.RawSet(lua.LString(k), lua.LString(v))
- }
- var err error
- if err = s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadDetailPage"),
- NRet: 1,
- Protect: true,
- }, tab); err != nil {
- return data, err
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- //拼map
- if v3, ok := lv.(*lua.LTable); ok {
- v3.ForEach(func(k, v lua.LValue) {
- if tmp, ok := k.(lua.LString); ok {
- key := string(tmp)
- if value, ok := v.(lua.LString); ok {
- data[key] = string(value)
- } else if value, ok := v.(lua.LNumber); ok {
- data[key] = value
- } else if value, ok := v.(*lua.LTable); ok {
- tmp := util.TableToMap(value)
- data[key] = tmp
- }
- }
- })
- return data, err
- } else {
- return nil, err
- }
- }
- //下载解析内容页
- func (s *Spider) DownloadDetailPageTest(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) {
- defer mu.Catch()
- tab := s.L.NewTable()
- for k, v := range param {
- tab.RawSet(lua.LString(k), lua.LString(v))
- }
- //co := s.L.NewThread()
- //co.ScriptFileName = s.L.ScriptFileName
- //defer co.Close()
- var err error
- if err = s.L.CallByParam(lua.P{
- Fn: s.L.GetGlobal("downloadDetailPage"),
- NRet: 1,
- Protect: true,
- }, tab); err != nil {
- return data, err
- }
- lv := s.L.Get(-1)
- s.L.Pop(1)
- var flag = 0
- //拼map
- if v3, ok := lv.(*lua.LTable); ok {
- v3.ForEach(func(k, v lua.LValue) {
- if tmp, ok := k.(lua.LString); ok {
- key := string(tmp)
- if value, ok := v.(lua.LString); ok {
- data[key] = string(value)
- } else if value, ok := v.(lua.LNumber); ok {
- data[key] = value
- } else if value, ok := v.(*lua.LTable); ok {
- tmp := util.TableToMap(value)
- data[key] = tmp
- }
- } else {
- flag = -1
- return
- }
- })
- if flag == -1 {
- return map[string]interface{}{
- "no": util.GetTableEx(lv.(*lua.LTable)),
- }, errors.New("no")
- } else {
- return data, err
- }
- } else {
- return map[string]interface{}{
- "no": lv,
- }, errors.New("no")
- }
- }
- //获取随机数
- func GetRandMath(num int) int {
- r := rand.New(rand.NewSource(time.Now().UnixNano()))
- return r.Intn(num)
- }
|