qianlima.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244
  1. package main
  2. import (
  3. "bufio"
  4. "bytes"
  5. "encoding/json"
  6. "fmt"
  7. "github.com/xuri/excelize/v2"
  8. "io"
  9. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  10. "net/http"
  11. "os"
  12. "spider_creator/backend/script"
  13. "time"
  14. )
  15. // QlmListDataDownload 千里马列表页数据下载
  16. func (a *App) QlmListDataDownload(param map[string]interface{}, record map[string]interface{}) *Result {
  17. qu.Debug(param, record)
  18. r := &Result{}
  19. if User != nil {
  20. if !glvm.ScriptRunning {
  21. page := "list"
  22. detailScript := glvm.LoadScript("list")
  23. if detailScript != "" {
  24. //getResult(map[string]interface{}{"param": param, "user": User}, r, "qlm/updateRecord")
  25. //if r.Err == 1 {
  26. go DownloadData(record, detailScript, page) //下载
  27. //}
  28. } else {
  29. r.Msg = "详情页采集脚本加载失败!"
  30. }
  31. } else {
  32. r.Msg = "同时只能执行一个脚本,请稍后再试!"
  33. }
  34. } else {
  35. r.Msg = "用户登录异常,请重新登录!"
  36. qu.Debug(r.Msg)
  37. }
  38. return r
  39. }
  40. // QlmDetailDataDownload 千里马详情页数据下载
  41. func (a *App) QlmDetailDataDownload(param map[string]interface{}, record map[string]interface{}) *Result {
  42. qu.Debug(param, record)
  43. r := &Result{}
  44. if User != nil {
  45. if !glvm.ScriptRunning {
  46. page := "detail"
  47. detailScript := glvm.LoadScript("detail")
  48. if detailScript != "" {
  49. script.Datas = []map[string]interface{}{}
  50. getData(nil, qu.ObjToString(record["recordId"]), "json", "download", &script.Datas)
  51. if len(script.Datas) > 0 {
  52. r.Err = 1
  53. go DownloadData(record, detailScript, page) //下载
  54. } else {
  55. r.Msg = "未获取到列表页数据!"
  56. }
  57. //getResult(map[string]interface{}{"param": param, "user": User}, r, "qlm/updateRecord")
  58. } else {
  59. r.Msg = "详情页采集脚本加载失败!"
  60. }
  61. } else {
  62. r.Msg = "同时只能执行一个脚本,请稍后再试!"
  63. }
  64. } else {
  65. r.Msg = "用户登录异常,请重新登录!"
  66. qu.Debug(r.Msg)
  67. }
  68. return r
  69. }
  70. // DownloadData 执行脚本下载数据
  71. func DownloadData(record map[string]interface{}, scriptText, page string) {
  72. glvm.ScriptRunning = true
  73. defer func() {
  74. glvm.ScriptRunning = false
  75. }()
  76. glvm.ProxyServer, _ = record["proxyServer"].(bool)
  77. glvm.Headless, _ = record["headless"].(bool)
  78. glvm.ShowImage, _ = record["showImage"].(bool)
  79. recordId := qu.ObjToString(record["recordId"])
  80. //执行脚本
  81. glvm.RunScript(scriptText, recordId)
  82. for len(script.DataCache) > 0 {
  83. time.Sleep(time.Second * 1)
  84. }
  85. //state := 2
  86. //if page == "detail" {
  87. // state = 5
  88. //}
  89. //r := &Result{}
  90. //getResult(map[string]interface{}{"param": map[string]interface{}{"recordid": recordId, "state": state}}, r, "qlm/updateRecord")
  91. }
  92. // QlmRunExportExcelFile 导出excel
  93. func (a *App) QlmRunExportExcelFile(filepath, recordId string) error {
  94. qu.Debug("filepath---", filepath)
  95. f := excelize.NewFile()
  96. defer f.Close()
  97. f.SetCellStr("Sheet1", "A1", "ID")
  98. f.SetCellStr("Sheet1", "B1", "标题")
  99. f.SetCellStr("Sheet1", "C1", "链接")
  100. f.SetCellStr("Sheet1", "D1", "发布时间")
  101. f.SetCellStr("Sheet1", "E1", "重复")
  102. f.SetCellStr("Sheet1", "F1", "详情页采集")
  103. f.SetCellStr("Sheet1", "G1", "采集账号")
  104. f.SetCellStr("Sheet1", "H1", "推送状态")
  105. f.SetCellStr("Sheet1", "I1", "正文")
  106. getData(f, recordId, "excel", "export", nil)
  107. err := f.SaveAs(filepath)
  108. if err != nil {
  109. return err
  110. }
  111. return nil
  112. }
  113. // QlmRunExportJsonFile 导出json
  114. func (a *App) QlmRunExportJsonFile(filepath, recordId string) error {
  115. qu.Debug("filepath---", filepath)
  116. var result []map[string]interface{}
  117. getData(nil, recordId, "json", "export", &result)
  118. jsonData, err := json.MarshalIndent(result, "", " ")
  119. if err != nil {
  120. return err
  121. }
  122. fo, err := os.Create(filepath)
  123. if err != nil {
  124. return err
  125. }
  126. defer fo.Close()
  127. if _, err := fo.Write(jsonData); err != nil {
  128. return fmt.Errorf("failed to write data to file: %w", err)
  129. }
  130. return nil
  131. }
  132. func getData(file *excelize.File, recordId, exportStype, from string, result *[]map[string]interface{}) {
  133. // 将数据编码为JSON格式
  134. param := map[string]interface{}{
  135. "recordid": recordId, "from": from,
  136. }
  137. jsonData, err := json.Marshal(map[string]interface{}{"param": param})
  138. if err != nil {
  139. qu.Debug(err)
  140. }
  141. // 创建一个HTTP POST请求
  142. req, err := http.NewRequest("POST", fmt.Sprintf(serverAddress, "qlm/getData"), bytes.NewBuffer(jsonData))
  143. if err != nil {
  144. qu.Debug("Error creating request:", err)
  145. }
  146. // 设置请求头,表明发送的是JSON数据
  147. req.Header.Set("Content-Type", "application/json")
  148. // 发送HTTP请求并获取响应
  149. client := &http.Client{}
  150. resp, err := client.Do(req)
  151. if err != nil {
  152. qu.Debug("Error making request:", err)
  153. }
  154. defer resp.Body.Close()
  155. // 检查响应状态码
  156. if resp.StatusCode != http.StatusOK {
  157. qu.Debug("Error: server returned status:", resp.StatusCode)
  158. }
  159. // 创建一个bufio.Reader来逐行读取响应体(这里假设服务器发送的是逐条JSON对象)
  160. reader := bufio.NewReader(resp.Body)
  161. decoder := json.NewDecoder(reader)
  162. // 逐条读取并处理JSON数据
  163. n := 0
  164. index := 0
  165. for {
  166. var tmp map[string]interface{}
  167. // 尝试解码下一条JSON数据
  168. if err := decoder.Decode(&tmp); err != nil {
  169. // 检查是否是io.EOF错误,表示已经读取完所有数据
  170. if err == io.EOF {
  171. break
  172. }
  173. // 对于其他错误,打印错误信息并退出
  174. qu.Debug(err)
  175. } else {
  176. n++
  177. index++
  178. if exportStype == "excel" {
  179. indexStr := fmt.Sprint(index + 1)
  180. file.SetCellStr("Sheet1", "A"+indexStr, qu.ObjToString(tmp["_id"]))
  181. file.SetCellStr("Sheet1", "B"+indexStr, qu.ObjToString(tmp["title"]))
  182. file.SetCellStr("Sheet1", "C"+indexStr, qu.ObjToString(tmp["href"]))
  183. publishtime := qu.Int64All(tmp["publishtime"])
  184. if publishtime == 0 {
  185. file.SetCellStr("Sheet1", "D"+indexStr, "")
  186. } else {
  187. file.SetCellStr("Sheet1", "D"+indexStr, qu.FormatDateByInt64(&publishtime, qu.Date_Full_Layout))
  188. }
  189. repeatText := ""
  190. if repeat := tmp["repeat"]; repeat != nil {
  191. if repeatTmp, ok := repeat.(bool); ok && repeatTmp {
  192. repeatText = "重复"
  193. } else {
  194. repeatText = "不重复"
  195. }
  196. }
  197. file.SetCellStr("Sheet1", "E"+indexStr, repeatText)
  198. stateText := "未采集"
  199. state := qu.IntAll(tmp["state"])
  200. if state == 1 {
  201. stateText = "采集成功"
  202. } else if state == -1 {
  203. stateText = "采集失败"
  204. }
  205. file.SetCellStr("Sheet1", "F"+indexStr, stateText)
  206. file.SetCellStr("Sheet1", "G"+indexStr, qu.ObjToString(tmp["username"]))
  207. pushstateText := "未推送"
  208. if qu.IntAll(tmp["pushstate"]) == 1 {
  209. pushstateText = "推送成功"
  210. }
  211. file.SetCellStr("Sheet1", "H"+indexStr, pushstateText)
  212. file.SetCellStr("Sheet1", "I"+indexStr, qu.ObjToString(tmp["detail"]))
  213. } else if exportStype == "json" {
  214. *result = append(*result, tmp)
  215. }
  216. }
  217. }
  218. qu.Debug(recordId, "共获取数据量:", n)
  219. }
  220. // 保存数据
  221. func updateData() {
  222. for {
  223. select {
  224. case data := <-script.DataCache:
  225. r := &Result{}
  226. getResult(map[string]interface{}{"param": data}, r, "qlm/updateData")
  227. if r.Err == 1 {
  228. qu.Debug("保存成功:", data["href"], data["title"])
  229. } else {
  230. qu.Debug("保存失败:", data["href"], data["title"])
  231. }
  232. }
  233. }
  234. }