// 浏览器扩展能力 package browser import ( . "KeyWebsiteMonitor/spider/util" "context" "crypto/md5" "encoding/base64" "fmt" "io/ioutil" "net/http" "os" "strings" "time" "github.com/chromedp/cdproto/browser" "github.com/chromedp/cdproto/fetch" "github.com/chromedp/cdproto/page" "github.com/chromedp/chromedp" ) type ( Tab struct { Title string `json:"title"` Href string `json:"href"` } Tabs []*Tab ) // Screenshot func (b *Browser) Screenshot(tabTitle, tabUrl string, timeout int64, selectorType int, selector, save2file string) (err error) { ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout) if err != nil { return err } //defer fn() var res []byte var act chromedp.QueryAction switch selectorType { case selector_type_id: act = chromedp.Screenshot(selector, &res, chromedp.ByID) case selector_type_query: act = chromedp.Screenshot(selector, &res, chromedp.ByQuery) case selector_type_search: act = chromedp.Screenshot(selector, &res, chromedp.BySearch) case selector_type_jspath: act = chromedp.Screenshot(selector, &res, chromedp.ByJSPath) default: act = chromedp.Screenshot(selector, &res, chromedp.ByQueryAll) } err = chromedp.Run(ctx, act) if err != nil { return err } return os.WriteFile(save2file, res, 0777) } // PrintToPDF func (b *Browser) PrintToPDF(tabTitle, tabUrl string, timeout int64, save2file string) (err error) { ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout) if err != nil { return err } //defer fn() var res []byte var act chromedp.QueryAction = chromedp.ActionFunc(func(ctx context.Context) error { buf, _, err := page.PrintToPDF(). WithLandscape(false). WithPaperWidth(16.3). WithPaperHeight(11.69). WithMarginTop(0.1). WithMarginRight(0). WithMarginBottom(0.1). WithMarginLeft(0). WithPrintBackground(true). Do(ctx) // 通过cdp执行PrintToPDF if err != nil { return err } res = buf return nil }) err = chromedp.Run(ctx, act) if err != nil { return err } return os.WriteFile(save2file, res, 0777) } // GetBrowserTabs func (b *Browser) GetBrowserTabs(tabTitle, tabUrl string, timeout int64) ([]map[string]interface{}, error) { ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout) if err != nil { return nil, err } //defer fn() ts, err := chromedp.Targets(ctx) if err != nil { return nil, err } ret := make([]map[string]interface{}, 0, 0) for _, t := range ts { ret = append(ret, map[string]interface{}{ "title": t.Title, "url": t.URL, }) } return ret, nil } // DownloadFile 只有在非headless模式下有效,与click方法其实是一致的 func (b *Browser) DownloadFile(tabTitle, tabUrl string, timeout int64, selector string, selectorType int, filename string, save2dir string) error { defer Catch() ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout) if err != nil { return err } //defer fn() var act chromedp.QueryAction switch selectorType { case selector_type_id: act = chromedp.Click(selector, chromedp.ByID) case selector_type_query: act = chromedp.Click(selector, chromedp.ByQuery) case selector_type_search: act = chromedp.Click(selector, chromedp.BySearch) case selector_type_jspath: act = chromedp.Click(selector, chromedp.ByJSPath) default: act = chromedp.Click(selector, chromedp.ByQueryAll) } done := make(chan bool, 1) chromedp.ListenTarget(ctx, func(v interface{}) { switch ev := v.(type) { case *fetch.EventRequestPaused: if ev.ResponseStatusCode == 0 { go func() { if err := chromedp.Run(ctx, fetch.ContinueRequest(ev.RequestID).WithInterceptResponse(true), ); err != nil { fmt.Println(err.Error()) } }() } else { go func() { fulfill := fetch.FulfillRequest(ev.RequestID, ev.ResponseStatusCode) if ev.ResponseStatusCode == 200 { headers := append(ev.ResponseHeaders, &fetch.HeaderEntry{ Name: "Content-Disposition", Value: fmt.Sprintf("attachment; filename=%s", filename), }) fmt.Println("headers:") for k, v := range headers { fmt.Println(k, v.Name, v.Value) } fulfill = fulfill.WithResponseHeaders(headers) } if err := chromedp.Run(ctx, fulfill); err != nil { fmt.Println(err.Error()) } }() } case *browser.EventDownloadWillBegin: //开始下载文件 fmt.Println("start download file:", ev.SuggestedFilename) case *browser.EventDownloadProgress: //下载进度 if ev.State == browser.DownloadProgressStateCompleted { done <- true } } }) err = chromedp.Run(ctx, fetch.Enable().WithPatterns([]*fetch.RequestPattern{ {URLPattern: "*/*"}, }), browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(save2dir).WithEventsEnabled(true), act) select { case <-done: return err case <-time.After(60 * time.Second): return err } return err } // GoHistoryBack func (b *Browser) GoHistoryBack(tabTitle, tabUrl string, timeout int64) error { ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout) if err != nil { return err } //defer fn() var act chromedp.QueryAction = chromedp.NavigateBack() return chromedp.Run(ctx, act) } // SendImage2ChatBot func SendImage2ChatBot(uri, img, mentioned string) error { rawStr := img[22:] bs, err := base64.StdEncoding.DecodeString(rawStr) if err != nil { return err } h := md5.New() h.Write(bs) hash := h.Sum(nil) hashStr := fmt.Sprintf("%x", hash) postBody := fmt.Sprintf(`{ "msgtype": "image", "image": { "base64": "%s", "md5": "%s", "mentioned_list":["@%s"] } } `, rawStr, strings.ToLower(hashStr), mentioned) client := new(http.Client) req, err := http.NewRequest("POST", uri, strings.NewReader(postBody)) if err != nil { return err } req.Header.Set("Content-Type", "application/json") resp, err := client.Do(req) if err != nil { return err } bs, _ = ioutil.ReadAll(resp.Body) resp.Body.Close() return nil } // SendText2ChatBot func SendText2ChatBot(uri, text, mentioned string) error { postBody := fmt.Sprintf(`{ "msgtype": "text", "text": { "content": "%s", "mentioned_list":["@%s"] } } `, text, mentioned) client := new(http.Client) req, err := http.NewRequest("POST", uri, strings.NewReader(postBody)) if err != nil { return err } req.Header.Set("Content-Type", "application/json") resp, err := client.Do(req) if err != nil { return err } _, _ = ioutil.ReadAll(resp.Body) resp.Body.Close() return nil } // Request func Request(method, href string, header map[string]interface{}, param map[string]interface{}, timeout int64) (error, string) { client := http.Client{ Timeout: time.Duration(timeout) * time.Second, } body := new(strings.Builder) for k, v := range param { body.WriteString(fmt.Sprintf("%s=%v&", k, v)) } req, err := http.NewRequest(method, href, strings.NewReader(body.String())) if err != nil { return err, "" } for k, v := range header { value, _ := v.(string) req.Header.Set(k, value) } resp, err := client.Do(req) if err != nil { return err, "" } bs, err := ioutil.ReadAll(resp.Body) if err != nil { return err, "" } resp.Body.Close() return nil, string(bs) }