package main import ( "bufio" "bytes" "encoding/json" "fmt" "github.com/xuri/excelize/v2" "io" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "net/http" "os" "spider_creator/backend/script" "time" ) // QlmStopDownloadData 终止下载 func (a *App) QlmStopDownloadData(record map[string]interface{}) *Result { r := &Result{} qu.Debug(record) glvm.CloseTabs() //关闭浏览器资源 r.Err = 1 return r } // QlmListDataDownload 千里马列表页数据下载 func (a *App) QlmListDataDownload(param map[string]interface{}, record map[string]interface{}) *Result { qu.Debug(param, record) r := &Result{} if User != nil { if !glvm.ScriptRunning { page := "list" detailScript := glvm.LoadScript("list") if detailScript != "" { getResult(map[string]interface{}{"param": param, "user": User}, r, "qlm/updateRecord") if r.Err == 1 { go DownloadData(record, detailScript, page) //下载 } } else { r.Msg = "详情页采集脚本加载失败!" } } else { r.Msg = "同时只能执行一个脚本,请稍后再试!" } } else { r.Msg = "用户登录异常,请重新登录!" } return r } // QlmDetailDataDownload 千里马详情页数据下载 func (a *App) QlmDetailDataDownload(param map[string]interface{}, record map[string]interface{}) *Result { qu.Debug(param, record) r := &Result{} if User != nil { if !glvm.ScriptRunning { page := "detail" detailScript := glvm.LoadScript("detail") if detailScript != "" { script.Datas = []map[string]interface{}{} getData(nil, qu.ObjToString(record["recordid"]), "json", "download", &script.Datas) qu.Debug("获取待采数据量:", len(script.Datas)) if len(script.Datas) > 0 { r.Err = 1 go DownloadData(record, detailScript, page) //下载 } else { r.Msg = "无可采集数据!" } //getResult(map[string]interface{}{"param": param, "user": User}, r, "qlm/updateRecord") } else { r.Msg = "详情页采集脚本加载失败!" } } else { r.Msg = "同时只能执行一个脚本,请稍后再试!" } } else { r.Msg = "用户登录异常,请重新登录!" qu.Debug(r.Msg) } return r } // DownloadData 执行脚本下载数据 func DownloadData(record map[string]interface{}, scriptText, page string) { defer qu.Catch() glvm.ScriptRunning = true defer func() { glvm.ScriptRunning = false }() glvm.ProxyServer, _ = record["proxyServer"].(bool) glvm.Headless, _ = record["headless"].(bool) glvm.ShowImage, _ = record["showImage"].(bool) recordId := qu.ObjToString(record["recordid"]) //执行脚本 state := 0 err := glvm.RunScript(scriptText, recordId) if err == nil { for len(script.DataCache) > 0 { qu.Debug("当前待保存数据量:", len(script.DataCache)) time.Sleep(time.Second * 1) } } if page == "list" { state = 2 } else if page == "detail" { state = 5 } r := &Result{} getResult(map[string]interface{}{"param": map[string]interface{}{"recordid": recordId, "state": state}}, r, "qlm/updateRecord") } // QlmExportExcelFile 导出excel func (a *App) QlmExportExcelFile(filepath, recordId string) map[string]interface{} { qu.Debug("filepath---", filepath) var msg string var errType int if err := runExportExcelFile(filepath, recordId); err != nil { msg = err.Error() } else { msg = "导出成功" errType = 1 } return map[string]interface{}{"err": errType, "msg": msg} } // QlmExportJsonFile 导出json func (a *App) QlmExportJsonFile(filepath, recordId string) map[string]interface{} { qu.Debug("filepath---", filepath) var msg string var errType int if err := runQlmExportJsonFile(filepath, recordId); err != nil { msg = err.Error() } else { msg = "导出成功" errType = 1 } return map[string]interface{}{"err": errType, "msg": msg} } func runExportExcelFile(filepath, recordId string) error { f := excelize.NewFile() defer f.Close() f.SetCellStr("Sheet1", "A1", "ID") f.SetCellStr("Sheet1", "B1", "标题") f.SetCellStr("Sheet1", "C1", "链接") f.SetCellStr("Sheet1", "D1", "发布时间") f.SetCellStr("Sheet1", "E1", "重复") f.SetCellStr("Sheet1", "F1", "详情页采集") f.SetCellStr("Sheet1", "G1", "采集账号") f.SetCellStr("Sheet1", "H1", "推送状态") f.SetCellStr("Sheet1", "I1", "正文") getData(f, recordId, "excel", "export", nil) err := f.SaveAs(filepath) if err != nil { return err } return nil } func runQlmExportJsonFile(filepath, recordId string) error { var result []map[string]interface{} getData(nil, recordId, "json", "export", &result) jsonData, err := json.MarshalIndent(result, "", " ") if err != nil { return err } fo, err := os.Create(filepath) if err != nil { return err } defer fo.Close() if _, err := fo.Write(jsonData); err != nil { return fmt.Errorf("failed to write data to file: %w", err) } return nil } func getData(file *excelize.File, recordId, exportStype, from string, result *[]map[string]interface{}) { // 将数据编码为JSON格式 param := map[string]interface{}{ "recordid": recordId, "from": from, } jsonData, err := json.Marshal(map[string]interface{}{"param": param}) if err != nil { qu.Debug(err) } // 创建一个HTTP POST请求 req, err := http.NewRequest("POST", fmt.Sprintf(serverAddress, "qlm/getData"), bytes.NewBuffer(jsonData)) if err != nil { qu.Debug("Error creating request:", err) } // 设置请求头,表明发送的是JSON数据 req.Header.Set("Content-Type", "application/json") // 发送HTTP请求并获取响应 client := &http.Client{} resp, err := client.Do(req) if err != nil { qu.Debug("Error making request:", err) } defer resp.Body.Close() // 检查响应状态码 if resp.StatusCode != http.StatusOK { qu.Debug("Error: server returned status:", resp.StatusCode) } // 创建一个bufio.Reader来逐行读取响应体(这里假设服务器发送的是逐条JSON对象) reader := bufio.NewReader(resp.Body) decoder := json.NewDecoder(reader) // 逐条读取并处理JSON数据 n := 0 index := 0 for { var tmp map[string]interface{} // 尝试解码下一条JSON数据 if err := decoder.Decode(&tmp); err != nil { // 检查是否是io.EOF错误,表示已经读取完所有数据 if err == io.EOF { break } // 对于其他错误,打印错误信息并退出 qu.Debug(err) } else { n++ index++ if exportStype == "excel" { indexStr := fmt.Sprint(index + 1) file.SetCellStr("Sheet1", "A"+indexStr, qu.ObjToString(tmp["_id"])) file.SetCellStr("Sheet1", "B"+indexStr, qu.ObjToString(tmp["title"])) file.SetCellStr("Sheet1", "C"+indexStr, qu.ObjToString(tmp["href"])) if ptime, ok := tmp["publishtime"].(string); ok { file.SetCellStr("Sheet1", "D"+indexStr, ptime) } else { publishtime := qu.Int64All(tmp["publishtime"]) if publishtime == 0 { file.SetCellStr("Sheet1", "D"+indexStr, "") } else { file.SetCellStr("Sheet1", "D"+indexStr, qu.FormatDateByInt64(&publishtime, qu.Date_Full_Layout)) } } repeatText := "未判重" if repeat := tmp["rp"]; repeat != nil { if repeatTmp, ok := repeat.(bool); ok && repeatTmp { repeatText = "重复" } else { repeatText = "不重复" } } file.SetCellStr("Sheet1", "E"+indexStr, repeatText) stateText := "未采集" state := qu.IntAll(tmp["state"]) if state == 1 { stateText = "采集成功" } else if state == -1 { stateText = "采集失败" } file.SetCellStr("Sheet1", "F"+indexStr, stateText) file.SetCellStr("Sheet1", "G"+indexStr, qu.ObjToString(tmp["username"])) pushstateText := "未推送" if qu.IntAll(tmp["pushstate"]) == 1 { pushstateText = "推送成功" } file.SetCellStr("Sheet1", "H"+indexStr, pushstateText) file.SetCellStr("Sheet1", "I"+indexStr, qu.ObjToString(tmp["detail"])) } else if exportStype == "json" { *result = append(*result, tmp) } } } qu.Debug(recordId, "共获取数据量:", n) } // 保存数据 func updateData() { for { select { case data := <-script.DataCache: r := &Result{} getResult(map[string]interface{}{"param": data}, r, "qlm/updateData") if r.Err == 1 { qu.Debug("保存成功:", data["href"], data["title"]) } else { qu.Debug("保存失败:", data["href"], data["title"]) } } } }