expandaction.go 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. // 浏览器扩展能力
  2. package browser
  3. import (
  4. . "KeyWebsiteMonitor/spider/util"
  5. "context"
  6. "crypto/md5"
  7. "encoding/base64"
  8. "fmt"
  9. "io/ioutil"
  10. "net/http"
  11. "os"
  12. "strings"
  13. "time"
  14. "github.com/chromedp/cdproto/browser"
  15. "github.com/chromedp/cdproto/fetch"
  16. "github.com/chromedp/cdproto/page"
  17. "github.com/chromedp/chromedp"
  18. )
  19. type (
  20. Tab struct {
  21. Title string `json:"title"`
  22. Href string `json:"href"`
  23. }
  24. Tabs []*Tab
  25. )
  26. // Screenshot
  27. func (b *Browser) Screenshot(tabTitle, tabUrl string, timeout int64,
  28. selectorType int, selector, save2file string) (err error) {
  29. ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
  30. if err != nil {
  31. return err
  32. }
  33. //defer fn()
  34. var res []byte
  35. var act chromedp.QueryAction
  36. switch selectorType {
  37. case selector_type_id:
  38. act = chromedp.Screenshot(selector, &res, chromedp.ByID)
  39. case selector_type_query:
  40. act = chromedp.Screenshot(selector, &res, chromedp.ByQuery)
  41. case selector_type_search:
  42. act = chromedp.Screenshot(selector, &res, chromedp.BySearch)
  43. case selector_type_jspath:
  44. act = chromedp.Screenshot(selector, &res, chromedp.ByJSPath)
  45. default:
  46. act = chromedp.Screenshot(selector, &res, chromedp.ByQueryAll)
  47. }
  48. err = chromedp.Run(ctx, act)
  49. if err != nil {
  50. return err
  51. }
  52. return os.WriteFile(save2file, res, 0777)
  53. }
  54. // PrintToPDF
  55. func (b *Browser) PrintToPDF(tabTitle, tabUrl string, timeout int64, save2file string) (err error) {
  56. ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
  57. if err != nil {
  58. return err
  59. }
  60. //defer fn()
  61. var res []byte
  62. var act chromedp.QueryAction = chromedp.ActionFunc(func(ctx context.Context) error {
  63. buf, _, err := page.PrintToPDF().
  64. WithLandscape(false).
  65. WithPaperWidth(16.3).
  66. WithPaperHeight(11.69).
  67. WithMarginTop(0.1).
  68. WithMarginRight(0).
  69. WithMarginBottom(0.1).
  70. WithMarginLeft(0).
  71. WithPrintBackground(true).
  72. Do(ctx) // 通过cdp执行PrintToPDF
  73. if err != nil {
  74. return err
  75. }
  76. res = buf
  77. return nil
  78. })
  79. err = chromedp.Run(ctx, act)
  80. if err != nil {
  81. return err
  82. }
  83. return os.WriteFile(save2file, res, 0777)
  84. }
  85. // GetBrowserTabs
  86. func (b *Browser) GetBrowserTabs(tabTitle, tabUrl string, timeout int64) ([]map[string]interface{}, error) {
  87. ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
  88. if err != nil {
  89. return nil, err
  90. }
  91. //defer fn()
  92. ts, err := chromedp.Targets(ctx)
  93. if err != nil {
  94. return nil, err
  95. }
  96. ret := make([]map[string]interface{}, 0, 0)
  97. for _, t := range ts {
  98. ret = append(ret, map[string]interface{}{
  99. "title": t.Title,
  100. "url": t.URL,
  101. })
  102. }
  103. return ret, nil
  104. }
  105. // DownloadFile 只有在非headless模式下有效,与click方法其实是一致的
  106. func (b *Browser) DownloadFile(tabTitle, tabUrl string, timeout int64, selector string,
  107. selectorType int, filename string, save2dir string) error {
  108. defer Catch()
  109. ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
  110. if err != nil {
  111. return err
  112. }
  113. //defer fn()
  114. var act chromedp.QueryAction
  115. switch selectorType {
  116. case selector_type_id:
  117. act = chromedp.Click(selector, chromedp.ByID)
  118. case selector_type_query:
  119. act = chromedp.Click(selector, chromedp.ByQuery)
  120. case selector_type_search:
  121. act = chromedp.Click(selector, chromedp.BySearch)
  122. case selector_type_jspath:
  123. act = chromedp.Click(selector, chromedp.ByJSPath)
  124. default:
  125. act = chromedp.Click(selector, chromedp.ByQueryAll)
  126. }
  127. done := make(chan bool, 1)
  128. chromedp.ListenTarget(ctx, func(v interface{}) {
  129. switch ev := v.(type) {
  130. case *fetch.EventRequestPaused:
  131. if ev.ResponseStatusCode == 0 {
  132. go func() {
  133. if err := chromedp.Run(ctx,
  134. fetch.ContinueRequest(ev.RequestID).WithInterceptResponse(true),
  135. ); err != nil {
  136. fmt.Println(err.Error())
  137. }
  138. }()
  139. } else {
  140. go func() {
  141. fulfill := fetch.FulfillRequest(ev.RequestID, ev.ResponseStatusCode)
  142. if ev.ResponseStatusCode == 200 {
  143. headers := append(ev.ResponseHeaders, &fetch.HeaderEntry{
  144. Name: "Content-Disposition",
  145. Value: fmt.Sprintf("attachment; filename=%s", filename),
  146. })
  147. fmt.Println("headers:")
  148. for k, v := range headers {
  149. fmt.Println(k, v.Name, v.Value)
  150. }
  151. fulfill = fulfill.WithResponseHeaders(headers)
  152. }
  153. if err := chromedp.Run(ctx, fulfill); err != nil {
  154. fmt.Println(err.Error())
  155. }
  156. }()
  157. }
  158. case *browser.EventDownloadWillBegin:
  159. //开始下载文件
  160. fmt.Println("start download file:", ev.SuggestedFilename)
  161. case *browser.EventDownloadProgress:
  162. //下载进度
  163. if ev.State == browser.DownloadProgressStateCompleted {
  164. done <- true
  165. }
  166. }
  167. })
  168. err = chromedp.Run(ctx,
  169. fetch.Enable().WithPatterns([]*fetch.RequestPattern{
  170. {URLPattern: "*/*"},
  171. }),
  172. browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(save2dir).WithEventsEnabled(true),
  173. act)
  174. select {
  175. case <-done:
  176. return err
  177. case <-time.After(60 * time.Second):
  178. return err
  179. }
  180. return err
  181. }
  182. // GoHistoryBack
  183. func (b *Browser) GoHistoryBack(tabTitle, tabUrl string, timeout int64) error {
  184. ctx, _, err := b.findTabContext(tabTitle, tabUrl, timeout)
  185. if err != nil {
  186. return err
  187. }
  188. //defer fn()
  189. var act chromedp.QueryAction = chromedp.NavigateBack()
  190. return chromedp.Run(ctx,
  191. act)
  192. }
  193. // SendImage2ChatBot
  194. func SendImage2ChatBot(uri, img, mentioned string) error {
  195. rawStr := img[22:]
  196. bs, err := base64.StdEncoding.DecodeString(rawStr)
  197. if err != nil {
  198. return err
  199. }
  200. h := md5.New()
  201. h.Write(bs)
  202. hash := h.Sum(nil)
  203. hashStr := fmt.Sprintf("%x", hash)
  204. postBody := fmt.Sprintf(`{
  205. "msgtype": "image",
  206. "image": {
  207. "base64": "%s",
  208. "md5": "%s",
  209. "mentioned_list":["@%s"]
  210. }
  211. }
  212. `, rawStr, strings.ToLower(hashStr), mentioned)
  213. client := new(http.Client)
  214. req, err := http.NewRequest("POST", uri,
  215. strings.NewReader(postBody))
  216. if err != nil {
  217. return err
  218. }
  219. req.Header.Set("Content-Type", "application/json")
  220. resp, err := client.Do(req)
  221. if err != nil {
  222. return err
  223. }
  224. bs, _ = ioutil.ReadAll(resp.Body)
  225. resp.Body.Close()
  226. return nil
  227. }
  228. // SendText2ChatBot
  229. func SendText2ChatBot(uri, text, mentioned string) error {
  230. postBody := fmt.Sprintf(`{
  231. "msgtype": "text",
  232. "text": {
  233. "content": "%s",
  234. "mentioned_list":["@%s"]
  235. }
  236. }
  237. `, text, mentioned)
  238. client := new(http.Client)
  239. req, err := http.NewRequest("POST", uri,
  240. strings.NewReader(postBody))
  241. if err != nil {
  242. return err
  243. }
  244. req.Header.Set("Content-Type", "application/json")
  245. resp, err := client.Do(req)
  246. if err != nil {
  247. return err
  248. }
  249. _, _ = ioutil.ReadAll(resp.Body)
  250. resp.Body.Close()
  251. return nil
  252. }
  253. // Request
  254. func Request(method, href string, header map[string]interface{}, param map[string]interface{}, timeout int64) (error, string) {
  255. client := http.Client{
  256. Timeout: time.Duration(timeout) * time.Second,
  257. }
  258. body := new(strings.Builder)
  259. for k, v := range param {
  260. body.WriteString(fmt.Sprintf("%s=%v&", k, v))
  261. }
  262. req, err := http.NewRequest(method, href, strings.NewReader(body.String()))
  263. if err != nil {
  264. return err, ""
  265. }
  266. for k, v := range header {
  267. value, _ := v.(string)
  268. req.Header.Set(k, value)
  269. }
  270. resp, err := client.Do(req)
  271. if err != nil {
  272. return err, ""
  273. }
  274. bs, err := ioutil.ReadAll(resp.Body)
  275. if err != nil {
  276. return err, ""
  277. }
  278. resp.Body.Close()
  279. return nil, string(bs)
  280. }