123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299 |
- // 浏览器扩展能力
- package browser
- import (
- . "KeyWebsiteMonitor/spider/util"
- "context"
- "crypto/md5"
- "encoding/base64"
- "fmt"
- "io/ioutil"
- "net/http"
- "os"
- "strings"
- "time"
- "github.com/chromedp/cdproto/browser"
- "github.com/chromedp/cdproto/fetch"
- "github.com/chromedp/cdproto/page"
- "github.com/chromedp/chromedp"
- )
- type (
- Tab struct {
- Title string `json:"title"`
- Href string `json:"href"`
- }
- Tabs []*Tab
- )
- // Screenshot
- func (b *Browser) Screenshot(tabTitle, tabUrl string, timeout int64,
- selectorType int, selector, save2file string) (err error) {
- ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- //defer fn()
- var res []byte
- var act chromedp.QueryAction
- switch selectorType {
- case selector_type_id:
- act = chromedp.Screenshot(selector, &res, chromedp.ByID)
- case selector_type_query:
- act = chromedp.Screenshot(selector, &res, chromedp.ByQuery)
- case selector_type_search:
- act = chromedp.Screenshot(selector, &res, chromedp.BySearch)
- case selector_type_jspath:
- act = chromedp.Screenshot(selector, &res, chromedp.ByJSPath)
- default:
- act = chromedp.Screenshot(selector, &res, chromedp.ByQueryAll)
- }
- err = chromedp.Run(ctx, act)
- if err != nil {
- return err
- }
- return os.WriteFile(save2file, res, 0777)
- }
- // PrintToPDF
- func (b *Browser) PrintToPDF(tabTitle, tabUrl string, timeout int64, save2file string) (err error) {
- ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- //defer fn()
- var res []byte
- var act chromedp.QueryAction = chromedp.ActionFunc(func(ctx context.Context) error {
- buf, _, err := page.PrintToPDF().
- WithLandscape(false).
- WithPaperWidth(16.3).
- WithPaperHeight(11.69).
- WithMarginTop(0.1).
- WithMarginRight(0).
- WithMarginBottom(0.1).
- WithMarginLeft(0).
- WithPrintBackground(true).
- Do(ctx) // 通过cdp执行PrintToPDF
- if err != nil {
- return err
- }
- res = buf
- return nil
- })
- err = chromedp.Run(ctx, act)
- if err != nil {
- return err
- }
- return os.WriteFile(save2file, res, 0777)
- }
- // GetBrowserTabs
- func (b *Browser) GetBrowserTabs(tabTitle, tabUrl string, timeout int64) ([]map[string]interface{}, error) {
- ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return nil, err
- }
- //defer fn()
- ts, err := chromedp.Targets(ctx)
- if err != nil {
- return nil, err
- }
- ret := make([]map[string]interface{}, 0, 0)
- for _, t := range ts {
- ret = append(ret, map[string]interface{}{
- "title": t.Title,
- "url": t.URL,
- })
- }
- return ret, nil
- }
- // DownloadFile 只有在非headless模式下有效,与click方法其实是一致的
- func (b *Browser) DownloadFile(tabTitle, tabUrl string, timeout int64, selector string,
- selectorType int, filename string, save2dir string) error {
- defer Catch()
- ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- //defer fn()
- var act chromedp.QueryAction
- switch selectorType {
- case selector_type_id:
- act = chromedp.Click(selector, chromedp.ByID)
- case selector_type_query:
- act = chromedp.Click(selector, chromedp.ByQuery)
- case selector_type_search:
- act = chromedp.Click(selector, chromedp.BySearch)
- case selector_type_jspath:
- act = chromedp.Click(selector, chromedp.ByJSPath)
- default:
- act = chromedp.Click(selector, chromedp.ByQueryAll)
- }
- done := make(chan bool, 1)
- chromedp.ListenTarget(ctx, func(v interface{}) {
- switch ev := v.(type) {
- case *fetch.EventRequestPaused:
- if ev.ResponseStatusCode == 0 {
- go func() {
- if err := chromedp.Run(ctx,
- fetch.ContinueRequest(ev.RequestID).WithInterceptResponse(true),
- ); err != nil {
- fmt.Println(err.Error())
- }
- }()
- } else {
- go func() {
- fulfill := fetch.FulfillRequest(ev.RequestID, ev.ResponseStatusCode)
- if ev.ResponseStatusCode == 200 {
- headers := append(ev.ResponseHeaders, &fetch.HeaderEntry{
- Name: "Content-Disposition",
- Value: fmt.Sprintf("attachment; filename=%s", filename),
- })
- fmt.Println("headers:")
- for k, v := range headers {
- fmt.Println(k, v.Name, v.Value)
- }
- fulfill = fulfill.WithResponseHeaders(headers)
- }
- if err := chromedp.Run(ctx, fulfill); err != nil {
- fmt.Println(err.Error())
- }
- }()
- }
- case *browser.EventDownloadWillBegin:
- //开始下载文件
- fmt.Println("start download file:", ev.SuggestedFilename)
- case *browser.EventDownloadProgress:
- //下载进度
- if ev.State == browser.DownloadProgressStateCompleted {
- done <- true
- }
- }
- })
- err = chromedp.Run(ctx,
- fetch.Enable().WithPatterns([]*fetch.RequestPattern{
- {URLPattern: "*/*"},
- }),
- browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(save2dir).WithEventsEnabled(true),
- act)
- select {
- case <-done:
- return err
- case <-time.After(60 * time.Second):
- return err
- }
- return err
- }
- // GoHistoryBack
- func (b *Browser) GoHistoryBack(tabTitle, tabUrl string, timeout int64) error {
- ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
- if err != nil {
- return err
- }
- //defer fn()
- var act chromedp.QueryAction = chromedp.NavigateBack()
- return chromedp.Run(ctx,
- act)
- }
- // SendImage2ChatBot
- func SendImage2ChatBot(uri, img, mentioned string) error {
- rawStr := img[22:]
- bs, err := base64.StdEncoding.DecodeString(rawStr)
- if err != nil {
- return err
- }
- h := md5.New()
- h.Write(bs)
- hash := h.Sum(nil)
- hashStr := fmt.Sprintf("%x", hash)
- postBody := fmt.Sprintf(`{
- "msgtype": "image",
- "image": {
- "base64": "%s",
- "md5": "%s",
- "mentioned_list":["@%s"]
- }
- }
- `, rawStr, strings.ToLower(hashStr), mentioned)
- client := new(http.Client)
- req, err := http.NewRequest("POST", uri,
- strings.NewReader(postBody))
- if err != nil {
- return err
- }
- req.Header.Set("Content-Type", "application/json")
- resp, err := client.Do(req)
- if err != nil {
- return err
- }
- bs, _ = ioutil.ReadAll(resp.Body)
- resp.Body.Close()
- return nil
- }
- // SendText2ChatBot
- func SendText2ChatBot(uri, text, mentioned string) error {
- postBody := fmt.Sprintf(`{
- "msgtype": "text",
- "text": {
- "content": "%s",
- "mentioned_list":["@%s"]
- }
- }
- `, text, mentioned)
- client := new(http.Client)
- req, err := http.NewRequest("POST", uri,
- strings.NewReader(postBody))
- if err != nil {
- return err
- }
- req.Header.Set("Content-Type", "application/json")
- resp, err := client.Do(req)
- if err != nil {
- return err
- }
- _, _ = ioutil.ReadAll(resp.Body)
- resp.Body.Close()
- return nil
- }
- // Request
- func Request(method, href string, header map[string]interface{}, param map[string]interface{}, timeout int64) (error, string) {
- client := http.Client{
- Timeout: time.Duration(timeout) * time.Second,
- }
- body := new(strings.Builder)
- for k, v := range param {
- body.WriteString(fmt.Sprintf("%s=%v&", k, v))
- }
- req, err := http.NewRequest(method, href, strings.NewReader(body.String()))
- if err != nil {
- return err, ""
- }
- for k, v := range header {
- value, _ := v.(string)
- req.Header.Set(k, value)
- }
- resp, err := client.Do(req)
- if err != nil {
- return err, ""
- }
- bs, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- return err, ""
- }
- resp.Body.Close()
- return nil, string(bs)
- }
|