spider.go 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. /**
  2. 爬虫,脚本接口,需要扩展
  3. */
  4. package spider
  5. import (
  6. "errors"
  7. "math/rand"
  8. mu "mfw/util"
  9. qu "qfw/util"
  10. util "spider_util"
  11. "time"
  12. "github.com/yuin/gopher-lua"
  13. )
  14. //爬虫()
  15. type Spider struct {
  16. Script
  17. Code string //代码
  18. Name string //站点名称
  19. Channel string //栏目名称
  20. DownDetail bool //是否下载详细页
  21. LastPubshTime int64 //最后发布时间
  22. LastDownloadTime int64 //最后下载时间
  23. SpiderRunRate int64 //执行频率
  24. ExecuteOk bool //任务执行成功/完成标志
  25. Collection string //写入表名
  26. CoverAttr string //判重字段
  27. StoreMode int //存储模式
  28. StoreToMsgEvent int //消息类型
  29. SleepBase int //基本延时
  30. SleepRand int //随机延时
  31. TargetChannelUrl string //栏目页地址
  32. SpiderStartPage, SpiderMaxPage int64 //页码配置
  33. SpiderIsHistoricalMend bool
  34. SpiderIsMustDownload bool
  35. }
  36. //获取最新时间--作为最后更新时间
  37. func (s *Spider) GetLastPublishTime() (timestr string, errs interface{}) {
  38. defer mu.Catch()
  39. s.Test_goreqtime++
  40. if err := s.L.CallByParam(lua.P{
  41. Fn: s.L.GetGlobal("getLastPublishTime"),
  42. NRet: 1,
  43. Protect: true,
  44. }); err != nil {
  45. errs = err.Error()
  46. return "", errs
  47. }
  48. ret := s.L.Get(-1)
  49. s.L.Pop(1)
  50. if str, ok := ret.(lua.LString); ok {
  51. timestr = string(str)
  52. }
  53. if s.LastPubshTime < util.ParseDate2Int64(timestr) {
  54. //防止发布时间超前
  55. if util.ParseDate2Int64(timestr) > time.Now().Unix() {
  56. s.LastPubshTime = time.Now().Unix()
  57. } else {
  58. s.LastPubshTime = util.ParseDate2Int64(timestr)
  59. }
  60. }
  61. timestr = time.Unix(s.LastPubshTime, 0).Format(qu.Date_Full_Layout)
  62. return timestr, nil
  63. }
  64. //获取最新时间--作为最后更新时间
  65. func (s *Spider) GetLastPublishTimeTest() (timestr interface{}, errs interface{}) {
  66. defer mu.Catch()
  67. if err := s.L.CallByParam(lua.P{
  68. Fn: s.L.GetGlobal("getLastPublishTime"),
  69. NRet: 1,
  70. Protect: true,
  71. }); err != nil {
  72. errs = err.Error()
  73. return "", errs
  74. }
  75. ret := s.L.Get(-1)
  76. return ret, nil
  77. }
  78. //下载列表
  79. func (s *Spider) DownListPageItem() (list []map[string]interface{}, errs interface{}) {
  80. defer mu.Catch()
  81. s.Test_goreqlist++
  82. for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ {
  83. if err := s.L.CallByParam(lua.P{
  84. Fn: s.L.GetGlobal("downloadAndParseListPage"),
  85. NRet: 1,
  86. Protect: true,
  87. }, lua.LNumber(s.SpiderStartPage)); err != nil {
  88. errs = err.Error()
  89. }
  90. lv := s.L.Get(-1)
  91. s.L.Pop(1)
  92. if tbl, ok := lv.(*lua.LTable); ok {
  93. for i := 1; i <= tbl.Len(); i++ {
  94. v := tbl.RawGetInt(i).(*lua.LTable)
  95. tmp := util.GetTable(v)
  96. if qu.ObjToString(tmp["exit"]) == "true" {
  97. break
  98. }
  99. list = append(list, util.GetTable(v))
  100. }
  101. }
  102. }
  103. return list, errs
  104. }
  105. //下载列表
  106. func (s *Spider) DownListPageItemTest() (list []interface{}, errs interface{}) {
  107. defer mu.Catch()
  108. for ; s.SpiderStartPage <= s.SpiderMaxPage && !s.ExecuteOk; s.SpiderStartPage++ {
  109. if err := s.L.CallByParam(lua.P{
  110. Fn: s.L.GetGlobal("downloadAndParseListPage"),
  111. NRet: 1,
  112. Protect: true,
  113. }, lua.LNumber(s.SpiderStartPage)); err != nil {
  114. errs = err.Error()
  115. }
  116. lv := s.L.Get(-1)
  117. s.L.Pop(1)
  118. if tbl, ok := lv.(*lua.LTable); ok {
  119. var fors = 0
  120. for i := 1; i <= tbl.Len(); i++ {
  121. v, ok := tbl.RawGetInt(i).(*lua.LTable)
  122. if ok {
  123. tmp := util.GetTable(v)
  124. if qu.ObjToString(tmp["exit"]) == "true" {
  125. break
  126. }
  127. fors = -1
  128. list = append(list, util.GetTable(v))
  129. }
  130. }
  131. if fors == 0 {
  132. return []interface{}{util.GetTableEx(tbl)}, errors.New("no")
  133. }
  134. } else {
  135. return []interface{}{lv}, errors.New("no")
  136. }
  137. }
  138. return list, errs
  139. }
  140. //下载解析内容页
  141. func (s *Spider) DownloadDetailPage(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) {
  142. defer mu.Catch()
  143. s.Test_goreqcon++
  144. tab := s.L.NewTable()
  145. for k, v := range param {
  146. tab.RawSet(lua.LString(k), lua.LString(v))
  147. }
  148. var err error
  149. if err = s.L.CallByParam(lua.P{
  150. Fn: s.L.GetGlobal("downloadDetailPage"),
  151. NRet: 1,
  152. Protect: true,
  153. }, tab); err != nil {
  154. return data, err
  155. }
  156. lv := s.L.Get(-1)
  157. s.L.Pop(1)
  158. //拼map
  159. if v3, ok := lv.(*lua.LTable); ok {
  160. v3.ForEach(func(k, v lua.LValue) {
  161. if tmp, ok := k.(lua.LString); ok {
  162. key := string(tmp)
  163. if value, ok := v.(lua.LString); ok {
  164. data[key] = string(value)
  165. } else if value, ok := v.(lua.LNumber); ok {
  166. data[key] = value
  167. } else if value, ok := v.(*lua.LTable); ok {
  168. tmp := util.TableToMap(value)
  169. data[key] = tmp
  170. }
  171. }
  172. })
  173. return data, err
  174. } else {
  175. return nil, err
  176. }
  177. }
  178. //下载解析内容页
  179. func (s *Spider) DownloadDetailPageTest(param map[string]string, data map[string]interface{}) (map[string]interface{}, interface{}) {
  180. defer mu.Catch()
  181. tab := s.L.NewTable()
  182. for k, v := range param {
  183. tab.RawSet(lua.LString(k), lua.LString(v))
  184. }
  185. //co := s.L.NewThread()
  186. //co.ScriptFileName = s.L.ScriptFileName
  187. //defer co.Close()
  188. var err error
  189. if err = s.L.CallByParam(lua.P{
  190. Fn: s.L.GetGlobal("downloadDetailPage"),
  191. NRet: 1,
  192. Protect: true,
  193. }, tab); err != nil {
  194. return data, err
  195. }
  196. lv := s.L.Get(-1)
  197. s.L.Pop(1)
  198. var flag = 0
  199. //拼map
  200. if v3, ok := lv.(*lua.LTable); ok {
  201. v3.ForEach(func(k, v lua.LValue) {
  202. if tmp, ok := k.(lua.LString); ok {
  203. key := string(tmp)
  204. if value, ok := v.(lua.LString); ok {
  205. data[key] = string(value)
  206. } else if value, ok := v.(lua.LNumber); ok {
  207. data[key] = value
  208. } else if value, ok := v.(*lua.LTable); ok {
  209. tmp := util.TableToMap(value)
  210. data[key] = tmp
  211. }
  212. } else {
  213. flag = -1
  214. return
  215. }
  216. })
  217. if flag == -1 {
  218. return map[string]interface{}{
  219. "no": util.GetTableEx(lv.(*lua.LTable)),
  220. }, errors.New("no")
  221. } else {
  222. return data, err
  223. }
  224. } else {
  225. return map[string]interface{}{
  226. "no": lv,
  227. }, errors.New("no")
  228. }
  229. }
  230. //获取随机数
  231. func GetRandMath(num int) int {
  232. r := rand.New(rand.NewSource(time.Now().UnixNano()))
  233. return r.Intn(num)
  234. }