package main import ( "bufio" "bytes" "encoding/json" "fmt" "github.com/xuri/excelize/v2" "io" qu "jygit.jydev.jianyu360.cn/data_processing/common_utils" "net/http" "os" "spider_creator/backend/script" "time" ) // QlmListDataDownload 千里马列表页数据下载 func (a *App) QlmListDataDownload(param map[string]interface{}, record map[string]interface{}) *Result { qu.Debug(param, record) r := &Result{} if User != nil { if !glvm.ScriptRunning { page := "list" detailScript := glvm.LoadScript("list") if detailScript != "" { //getResult(map[string]interface{}{"param": param, "user": User}, r, "qlm/updateRecord") //if r.Err == 1 { go DownloadData(record, detailScript, page) //下载 //} } else { r.Msg = "详情页采集脚本加载失败!" } } else { r.Msg = "同时只能执行一个脚本,请稍后再试!" } } else { r.Msg = "用户登录异常,请重新登录!" qu.Debug(r.Msg) } return r } // QlmDetailDataDownload 千里马详情页数据下载 func (a *App) QlmDetailDataDownload(param map[string]interface{}, record map[string]interface{}) *Result { qu.Debug(param, record) r := &Result{} if User != nil { if !glvm.ScriptRunning { page := "detail" detailScript := glvm.LoadScript("detail") if detailScript != "" { script.Datas = []map[string]interface{}{} getData(nil, qu.ObjToString(record["recordId"]), "json", "download", &script.Datas) if len(script.Datas) > 0 { r.Err = 1 go DownloadData(record, detailScript, page) //下载 } else { r.Msg = "未获取到列表页数据!" } //getResult(map[string]interface{}{"param": param, "user": User}, r, "qlm/updateRecord") } else { r.Msg = "详情页采集脚本加载失败!" } } else { r.Msg = "同时只能执行一个脚本,请稍后再试!" } } else { r.Msg = "用户登录异常,请重新登录!" qu.Debug(r.Msg) } return r } // DownloadData 执行脚本下载数据 func DownloadData(record map[string]interface{}, scriptText, page string) { glvm.ScriptRunning = true defer func() { glvm.ScriptRunning = false }() glvm.ProxyServer, _ = record["proxyServer"].(bool) glvm.Headless, _ = record["headless"].(bool) glvm.ShowImage, _ = record["showImage"].(bool) recordId := qu.ObjToString(record["recordId"]) //执行脚本 glvm.RunScript(scriptText, recordId) for len(script.DataCache) > 0 { time.Sleep(time.Second * 1) } //state := 2 //if page == "detail" { // state = 5 //} //r := &Result{} //getResult(map[string]interface{}{"param": map[string]interface{}{"recordid": recordId, "state": state}}, r, "qlm/updateRecord") } // QlmRunExportExcelFile 导出excel func (a *App) QlmRunExportExcelFile(filepath, recordId string) error { qu.Debug("filepath---", filepath) f := excelize.NewFile() defer f.Close() f.SetCellStr("Sheet1", "A1", "ID") f.SetCellStr("Sheet1", "B1", "标题") f.SetCellStr("Sheet1", "C1", "链接") f.SetCellStr("Sheet1", "D1", "发布时间") f.SetCellStr("Sheet1", "E1", "重复") f.SetCellStr("Sheet1", "F1", "详情页采集") f.SetCellStr("Sheet1", "G1", "采集账号") f.SetCellStr("Sheet1", "H1", "推送状态") f.SetCellStr("Sheet1", "I1", "正文") getData(f, recordId, "excel", "export", nil) err := f.SaveAs(filepath) if err != nil { return err } return nil } // QlmRunExportJsonFile 导出json func (a *App) QlmRunExportJsonFile(filepath, recordId string) error { qu.Debug("filepath---", filepath) var result []map[string]interface{} getData(nil, recordId, "json", "export", &result) jsonData, err := json.MarshalIndent(result, "", " ") if err != nil { return err } fo, err := os.Create(filepath) if err != nil { return err } defer fo.Close() if _, err := fo.Write(jsonData); err != nil { return fmt.Errorf("failed to write data to file: %w", err) } return nil } func getData(file *excelize.File, recordId, exportStype, from string, result *[]map[string]interface{}) { // 将数据编码为JSON格式 param := map[string]interface{}{ "recordid": recordId, "from": from, } jsonData, err := json.Marshal(map[string]interface{}{"param": param}) if err != nil { qu.Debug(err) } // 创建一个HTTP POST请求 req, err := http.NewRequest("POST", fmt.Sprintf(serverAddress, "qlm/getData"), bytes.NewBuffer(jsonData)) if err != nil { qu.Debug("Error creating request:", err) } // 设置请求头,表明发送的是JSON数据 req.Header.Set("Content-Type", "application/json") // 发送HTTP请求并获取响应 client := &http.Client{} resp, err := client.Do(req) if err != nil { qu.Debug("Error making request:", err) } defer resp.Body.Close() // 检查响应状态码 if resp.StatusCode != http.StatusOK { qu.Debug("Error: server returned status:", resp.StatusCode) } // 创建一个bufio.Reader来逐行读取响应体(这里假设服务器发送的是逐条JSON对象) reader := bufio.NewReader(resp.Body) decoder := json.NewDecoder(reader) // 逐条读取并处理JSON数据 n := 0 index := 0 for { var tmp map[string]interface{} // 尝试解码下一条JSON数据 if err := decoder.Decode(&tmp); err != nil { // 检查是否是io.EOF错误,表示已经读取完所有数据 if err == io.EOF { break } // 对于其他错误,打印错误信息并退出 qu.Debug(err) } else { n++ index++ if exportStype == "excel" { indexStr := fmt.Sprint(index + 1) file.SetCellStr("Sheet1", "A"+indexStr, qu.ObjToString(tmp["_id"])) file.SetCellStr("Sheet1", "B"+indexStr, qu.ObjToString(tmp["title"])) file.SetCellStr("Sheet1", "C"+indexStr, qu.ObjToString(tmp["href"])) publishtime := qu.Int64All(tmp["publishtime"]) if publishtime == 0 { file.SetCellStr("Sheet1", "D"+indexStr, "") } else { file.SetCellStr("Sheet1", "D"+indexStr, qu.FormatDateByInt64(&publishtime, qu.Date_Full_Layout)) } repeatText := "" if repeat := tmp["repeat"]; repeat != nil { if repeatTmp, ok := repeat.(bool); ok && repeatTmp { repeatText = "重复" } else { repeatText = "不重复" } } file.SetCellStr("Sheet1", "E"+indexStr, repeatText) stateText := "未采集" state := qu.IntAll(tmp["state"]) if state == 1 { stateText = "采集成功" } else if state == -1 { stateText = "采集失败" } file.SetCellStr("Sheet1", "F"+indexStr, stateText) file.SetCellStr("Sheet1", "G"+indexStr, qu.ObjToString(tmp["username"])) pushstateText := "未推送" if qu.IntAll(tmp["pushstate"]) == 1 { pushstateText = "推送成功" } file.SetCellStr("Sheet1", "H"+indexStr, pushstateText) file.SetCellStr("Sheet1", "I"+indexStr, qu.ObjToString(tmp["detail"])) } else if exportStype == "json" { *result = append(*result, tmp) } } } qu.Debug(recordId, "共获取数据量:", n) } // 保存数据 func updateData() { for { select { case data := <-script.DataCache: r := &Result{} getResult(map[string]interface{}{"param": data}, r, "qlm/updateData") if r.Err == 1 { qu.Debug("保存成功:", data["href"], data["title"]) } else { qu.Debug("保存失败:", data["href"], data["title"]) } } } }