service.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. // service
  2. package spider
  3. import (
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. mu "mfw/util"
  8. "os"
  9. qu "qfw/util"
  10. mongodb "qfw/util/mongodb"
  11. util "spiderutil"
  12. "strings"
  13. "time"
  14. "gopkg.in/mgo.v2/bson"
  15. )
  16. //获取脚本文件
  17. func GetScript(code string, str ...interface{}) (script, script_list, script_content string) {
  18. defer mu.Catch()
  19. //script := ""
  20. luaconfig := *mongodb.FindOne("luaconfig", `{"code":"`+code+`"}`)
  21. //qu.Debug(code, "lua---", luaconfig)
  22. if luaconfig["listcheck"] == nil {
  23. luaconfig["listcheck"] = ""
  24. }
  25. if luaconfig["contentcheck"] == nil {
  26. luaconfig["contentcheck"] = ""
  27. }
  28. if luaconfig != nil && len(luaconfig) > 0 {
  29. common := luaconfig["param_common"].([]interface{})
  30. if len(str) > 0 {
  31. if len(common) == 15 {
  32. common = append(common, str[0], str[1], str[2])
  33. } else {
  34. common = append(common, false, false, str[0], str[1], str[2])
  35. }
  36. } else {
  37. if len(common) == 15 {
  38. common = append(common, "", "", "")
  39. } else {
  40. common = append(common, false, false, "", "", "")
  41. }
  42. }
  43. for k, v := range common {
  44. if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 {
  45. common[k] = qu.IntAll(v)
  46. }
  47. }
  48. script, _ = GetTmpModel(map[string][]interface{}{"common": common})
  49. script_time := ""
  50. if luaconfig["type_time"] == 0 {
  51. time := luaconfig["param_time"].([]interface{})
  52. script_time, _ = GetTmpModel(map[string][]interface{}{
  53. "time": time,
  54. })
  55. } else {
  56. script_time = luaconfig["str_time"].(string)
  57. }
  58. //script_list := "" //列表页
  59. if luaconfig["type_list"] == 0 {
  60. list := luaconfig["param_list"].([]interface{})
  61. addrs := strings.Split(list[1].(string), "\n")
  62. if len(addrs) > 0 {
  63. for k, v := range addrs {
  64. addrs[k] = "'" + v + "'"
  65. }
  66. list[1] = strings.Join(addrs, ",")
  67. } else {
  68. list[1] = ""
  69. }
  70. script_list, _ = GetTmpModel(map[string][]interface{}{
  71. "list": list,
  72. "listcheck": []interface{}{luaconfig["listcheck"]},
  73. })
  74. } else {
  75. script_list = luaconfig["str_list"].(string)
  76. }
  77. //script_content := "" //三级页
  78. if luaconfig["type_content"] == 0 {
  79. content := luaconfig["param_content"].([]interface{})
  80. script_content, _ = GetTmpModel(map[string][]interface{}{
  81. "content": content,
  82. "contentcheck": []interface{}{luaconfig["contentcheck"]},
  83. })
  84. } else {
  85. script_content = luaconfig["str_content"].(string)
  86. }
  87. script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"])
  88. script += `
  89. ` + script_time + `
  90. ` + script_list + `
  91. ` + script_content
  92. script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{}))
  93. }
  94. return
  95. }
  96. //保存更新脚本
  97. func SaveSpider(code string, param map[string]interface{}) bool {
  98. return mongodb.Update("luaconfig", bson.M{"code": code}, map[string]interface{}{"$set": param}, true, true)
  99. }
  100. /*获取最后发布时间
  101. comm 通用参数
  102. param 向导参数
  103. proficient 专家脚本
  104. guideType 向导类型
  105. */
  106. func GetLastPublishTime(comm, param []interface{}, proficient, downloadnode string, guideType int, scripts ...int) (timestr interface{}, err interface{}) {
  107. defer mu.Catch()
  108. var script string
  109. if guideType == 0 {
  110. script, err = GetTmpModel(map[string][]interface{}{
  111. "common": comm,
  112. "time": param,
  113. })
  114. } else {
  115. script, err = GetTmpModel(map[string][]interface{}{
  116. "common": comm,
  117. })
  118. script += proficient
  119. }
  120. if len(scripts) > 0 {
  121. return "", errors.New(script).Error()
  122. }
  123. if err != nil {
  124. return "", err
  125. }
  126. sp := CreateSpider(downloadnode, script)
  127. defer sp.L.Close()
  128. timestr, err = sp.GetLastPublishTimeTest()
  129. return
  130. }
  131. /*获取列表信息
  132. comm 通用参数
  133. param 向导参数
  134. model 补充模型
  135. modeltype 模型类型
  136. proficient 专家脚本
  137. guideType 向导类型
  138. */
  139. func GetPageList(comm, param []interface{}, model map[string]interface{}, listcheck interface{}, proficient, downloadnode string, guideType int, scripts ...int) (list []interface{}, err interface{}) {
  140. defer mu.Catch()
  141. var script string
  142. if guideType == 0 {
  143. script, err = GetTmpModel(map[string][]interface{}{
  144. "common": comm,
  145. "list": param,
  146. "listcheck": []interface{}{listcheck},
  147. })
  148. script = ReplaceModel(script, comm, model)
  149. } else {
  150. script, err = GetTmpModel(map[string][]interface{}{
  151. "common": comm,
  152. })
  153. script += proficient
  154. }
  155. if len(scripts) > 0 {
  156. return nil, errors.New(script).Error()
  157. }
  158. if err != nil {
  159. return nil, err
  160. }
  161. sp := CreateSpider(downloadnode, script)
  162. sp.SpiderMaxPage = 1
  163. defer sp.L.Close()
  164. list, err = sp.DownListPageItemTest()
  165. return
  166. }
  167. /*获取三级页信息
  168. comm 通用参数
  169. param 向导参数
  170. proficient 专家脚本
  171. guideType 向导类型
  172. */
  173. func GetContentInfo(comm, param []interface{}, data map[string]interface{}, contentcheck interface{}, proficient, downloadnode string, guideType int, scripts ...int) (rep map[string]interface{}, err interface{}) {
  174. defer mu.Catch()
  175. var script string
  176. if guideType == 0 {
  177. script, err = GetTmpModel(map[string][]interface{}{
  178. "common": comm,
  179. "content": param,
  180. "contentcheck": []interface{}{contentcheck},
  181. })
  182. } else {
  183. script, err = GetTmpModel(map[string][]interface{}{
  184. "common": comm,
  185. })
  186. script += proficient
  187. }
  188. if len(scripts) > 0 {
  189. return nil, errors.New(script).Error()
  190. }
  191. if err != nil {
  192. return nil, err
  193. }
  194. sp := CreateSpider(downloadnode, script)
  195. sp.SpiderMaxPage = 1
  196. defer sp.L.Close()
  197. param2 := map[string]string{}
  198. for k, v := range data {
  199. param2[k] = fmt.Sprint(v)
  200. }
  201. rep, err = sp.DownloadDetailPageTest(param2, data)
  202. return
  203. }
  204. //补充模型
  205. func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string {
  206. //补充通用信息
  207. commstr := `item["spidercode"]="` + comm[0].(string) + `";`
  208. commstr += `item["site"]="` + comm[1].(string) + `";`
  209. commstr += `item["channel"]="` + comm[2].(string) + `";`
  210. script = strings.Replace(script, "--Common--", commstr, -1)
  211. //补充模型信息
  212. modelstr := ""
  213. for k, v := range model {
  214. modelstr += `item["` + k + `"]="` + v.(string) + `";`
  215. }
  216. script = strings.Replace(script, "--Model--", modelstr, -1)
  217. return script
  218. }
  219. //创建爬虫
  220. func CreateSpider(downloadnode, script string, isfile ...string) *Spider {
  221. defer mu.Catch()
  222. sp := &Spider{}
  223. sp.LoadScript(downloadnode, script, isfile...)
  224. sp.Code = sp.GetVar("spiderCode")
  225. sp.SCode = sp.Code
  226. sp.Name = sp.GetVar("spiderName")
  227. sp.Channel = sp.GetVar("spiderChannel")
  228. sp.DownDetail = sp.GetBoolVar("spiderDownDetailPage")
  229. sp.Collection = sp.GetVar("spider2Collection")
  230. sp.SpiderStartPage = int64(sp.GetIntVar("spiderStartPage"))
  231. sp.SpiderMaxPage = int64(sp.GetIntVar("spiderMaxPage"))
  232. sp.SpiderRunRate = int64(sp.GetIntVar("spiderRunRate"))
  233. sp.StoreToMsgEvent = sp.GetIntVar("spiderStoreToMsgEvent")
  234. sp.StoreMode = sp.GetIntVar("spiderStoreMode")
  235. sp.CoverAttr = sp.GetVar("spiderCoverAttr")
  236. spiderSleepBase := sp.GetIntVar("spiderSleepBase")
  237. if spiderSleepBase == -1 {
  238. sp.SleepBase = 1000
  239. } else {
  240. sp.SleepBase = spiderSleepBase
  241. }
  242. spiderSleepRand := sp.GetIntVar("spiderSleepRand")
  243. if spiderSleepRand == -1 {
  244. sp.SleepRand = 1000
  245. } else {
  246. sp.SleepRand = spiderSleepRand
  247. }
  248. spiderTimeout := sp.GetIntVar("spiderTimeout")
  249. if spiderTimeout == -1 {
  250. sp.Timeout = 60
  251. } else {
  252. sp.Timeout = int64(spiderTimeout)
  253. }
  254. sp.TargetChannelUrl = sp.GetVar("spiderTargetChannelUrl")
  255. sp.SpiderIsHistoricalMend = sp.GetBoolVar("spiderIsHistoricalMend")
  256. sp.SpiderIsMustDownload = sp.GetBoolVar("spiderIsMustDownload")
  257. //qu.Debug(sp.SpiderIsHistoricalMend, sp.SpiderIsMustDownload)
  258. return sp
  259. }
  260. //生成爬虫脚本
  261. func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) {
  262. qu.Try(func() {
  263. if param != nil && param["common"] != nil {
  264. if len(param["common"]) < 12 {
  265. err = "公共参数配置不全"
  266. } else {
  267. script = fmt.Sprintf(util.Tmp_common, param["common"]...)
  268. }
  269. }
  270. if param != nil && param["time"] != nil {
  271. if len(param["time"]) < 3 {
  272. err = "方法:time-参数配置不全"
  273. } else {
  274. script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...)
  275. }
  276. }
  277. if param != nil && param["list"] != nil {
  278. if len(param["list"]) < 7 {
  279. err = "方法:list-参数配置不全"
  280. } else {
  281. list := []interface{}{param["listcheck"][0]}
  282. list = append(list, param["list"]...)
  283. script += fmt.Sprintf(util.Tmp_pagelist, list...)
  284. script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1)
  285. }
  286. }
  287. if param != nil && param["content"] != nil {
  288. if len(param["content"]) < 2 {
  289. err = "方法:content-参数配置不全"
  290. } else {
  291. content := []interface{}{param["contentcheck"][0]}
  292. content = append(content, param["content"]...)
  293. script += fmt.Sprintf(util.Tmp_content, content...)
  294. }
  295. }
  296. }, func(e interface{}) {
  297. err = e
  298. })
  299. return script, err
  300. }
  301. //生成文件
  302. func CreateFile(code, script string) (string, error) {
  303. filepath := "res/" + time.Now().Format("2006/01/02")
  304. err := os.MkdirAll(filepath, 0777)
  305. f, err := os.Create(filepath + "/spider_" + code + ".lua")
  306. defer f.Close()
  307. f.WriteString(script)
  308. return filepath, err
  309. }
  310. //上传脚本
  311. func UpdateSpiderByCodeState(code, state string, event int) (bool, error) {
  312. msgid := mu.UUID(8)
  313. data := map[string]interface{}{}
  314. data["code"] = code
  315. data["state"] = state
  316. rep := map[string]interface{}{}
  317. var bs []byte
  318. var err error
  319. if util.Config.Uploadevents[fmt.Sprint(event)] == "bid" { //?
  320. bs, err = MsclientBid.Call("", msgid, event, mu.SENDTO_TYPE_ALL_RECIVER, data, 60)
  321. } else {
  322. bs, err = Msclient.Call("", msgid, event, mu.SENDTO_TYPE_ALL_RECIVER, data, 60)
  323. }
  324. if err != nil {
  325. return false, err
  326. } else {
  327. json.Unmarshal(bs, &rep)
  328. b, _ := rep["b"].(bool)
  329. if !b {
  330. err = errors.New(qu.ObjToString(rep["err"]))
  331. }
  332. return b, err
  333. }
  334. }