webservice.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. // 对外服务
  2. package webservice
  3. import (
  4. "bytes"
  5. "crypto/tls"
  6. _ "embed"
  7. "encoding/json"
  8. "fmt"
  9. "io/ioutil"
  10. "log"
  11. "net/http"
  12. be "spider_creator/backend"
  13. bdb "spider_creator/backend/db"
  14. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  15. )
  16. const (
  17. LISTEN_ADDR = ":8080"
  18. AI_GEN_LIST_CSS = "http://182.92.66.70:5005/listpage"
  19. AI_GEN_DETAIL_CSS = "http://182.92.66.70:5005/detailpage"
  20. )
  21. type (
  22. SpiderConfigItem struct {
  23. Key string `json:"key"`
  24. Css string `json:"css"`
  25. Url string `json:"url"`
  26. }
  27. WebService struct {
  28. db *bdb.SpiderDb
  29. enf be.EventNotifyFace
  30. currentSpiderConfig *be.SpiderConfig
  31. }
  32. )
  33. var (
  34. //go:embed cert.pem
  35. certBytes []byte
  36. //go:embed key.pem
  37. keyBytes []byte
  38. currentTabSpiderConfig = &be.SpiderConfig{}
  39. )
  40. // NewWebService
  41. func NewWebService(db *bdb.SpiderDb, enf be.EventNotifyFace, csf *be.SpiderConfig) *WebService {
  42. return &WebService{db, enf, csf}
  43. }
  44. func (ws *WebService) RunHttpServe() {
  45. // 设置HTTP服务器
  46. mux := http.NewServeMux()
  47. // 解析证书
  48. cert, err := tls.X509KeyPair(certBytes, keyBytes)
  49. if err != nil {
  50. qu.Debug(err.Error())
  51. return
  52. }
  53. // 创建一个TLS配置
  54. tlsConfig := &tls.Config{
  55. // 可以在这里添加其他TLS配置
  56. Certificates: []tls.Certificate{cert},
  57. ServerName: "localhost",
  58. InsecureSkipVerify: true,
  59. }
  60. server := &http.Server{
  61. Addr: LISTEN_ADDR,
  62. Handler: mux,
  63. TLSConfig: tlsConfig,
  64. }
  65. //这里注册HTTP服务
  66. mux.HandleFunc("/save", ws.SaveSpiderConfig)
  67. mux.HandleFunc("/load", ws.LoadSpiderConfig)
  68. mux.HandleFunc("/loadListCss", ws.FindListCssSelector)
  69. mux.HandleFunc("/loadDetailCss", ws.FindDetailCssSelector)
  70. //
  71. qu.Debug("Starting HTTPS server on ", LISTEN_ADDR)
  72. err = server.ListenAndServeTLS("", "")
  73. if err != nil {
  74. qu.Debug("Failed to start server: ", err.Error())
  75. return
  76. }
  77. }
  78. // SaveSpiderConfig LoadCurrentSpiderConfig,json处理
  79. func (ws *WebService) SaveSpiderConfig(w http.ResponseWriter, r *http.Request) {
  80. qu.Debug("保存设置")
  81. w.Header().Set("Access-Control-Allow-Origin", "*")
  82. w.Header().Set("Content-Type", "application/json")
  83. var req = new(SpiderConfigItem)
  84. err := json.NewDecoder(r.Body).Decode(req)
  85. if err != nil {
  86. qu.Debug("序列化失败")
  87. http.Error(w, err.Error(), http.StatusBadRequest)
  88. return
  89. }
  90. switch req.Key {
  91. case "listItemCss":
  92. currentTabSpiderConfig.ListItemCss = req.Css
  93. case "listLinkCss":
  94. currentTabSpiderConfig.ListLinkCss = req.Css
  95. case "listPublishTimeCss":
  96. currentTabSpiderConfig.ListPubtimeCss = req.Css
  97. case "listNextPageCss":
  98. currentTabSpiderConfig.ListNextPageCss = req.Css
  99. case "listBodyCss":
  100. currentTabSpiderConfig.ListBodyCss = req.Css
  101. case "titleCss":
  102. currentTabSpiderConfig.TitleCss = req.Css
  103. case "publishUnitCss":
  104. currentTabSpiderConfig.PublishUnitCss = req.Css
  105. case "publishTimeCss":
  106. currentTabSpiderConfig.PublishTimeCss = req.Css
  107. case "contentCss":
  108. currentTabSpiderConfig.ContentCss = req.Css
  109. case "attachCss":
  110. currentTabSpiderConfig.AttachCss = req.Css
  111. }
  112. qu.Debug("CSS", req.Key, req.Css, req.Url)
  113. fmt.Fprint(w, "{'code':200}")
  114. //TODO 通知开发工具端,CSS选择器有变动
  115. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": req.Key, "css": req.Css, "url": req.Url})
  116. }
  117. // LoadSpiderConfig LoadCurrentSpiderConfig,加载,返回当前配置项
  118. func (ws *WebService) LoadSpiderConfig(w http.ResponseWriter, r *http.Request) {
  119. qu.Debug("加载当前配置项")
  120. w.Header().Set("Access-Control-Allow-Origin", "*")
  121. w.Header().Set("Content-Type", "application/json")
  122. var req = new(SpiderConfigItem)
  123. err := json.NewDecoder(r.Body).Decode(req)
  124. if err != nil {
  125. qu.Debug("序列化失败")
  126. http.Error(w, err.Error(), http.StatusBadRequest)
  127. return
  128. } else {
  129. qu.Debug("高亮:", req.Url, *currentTabSpiderConfig)
  130. err = json.NewEncoder(w).Encode(currentTabSpiderConfig)
  131. if err != nil {
  132. log.Println("反向序列化失败")
  133. http.Error(w, err.Error(), http.StatusBadRequest)
  134. return
  135. }
  136. }
  137. }
  138. func SetCurrentTabCssMark(cssMark map[string]interface{}) {
  139. sc, err := be.NewSpiderConfig(cssMark)
  140. if err != nil {
  141. qu.Debug("标注信息传输失败!")
  142. }
  143. currentTabSpiderConfig = sc
  144. //qu.Debug("当前编辑爬虫链接:", *currentTabSpiderConfig)
  145. }
  146. // FindListCssSelector
  147. func (ws *WebService) FindListCssSelector(w http.ResponseWriter, r *http.Request) {
  148. qu.Debug("AI生成列表页CSS...")
  149. w.Header().Set("Access-Control-Allow-Origin", "*")
  150. w.Header().Set("Content-Type", "application/json")
  151. reqData := struct {
  152. Data string `json:"data"`
  153. User string `json:"user"`
  154. }{}
  155. err := json.NewDecoder(r.Body).Decode(&reqData)
  156. if err != nil {
  157. return
  158. }
  159. //发起远程请求,进行大模型计算
  160. reqData.User = "jianyu"
  161. jsonData, err := json.Marshal(reqData)
  162. if err != nil {
  163. fmt.Println("Error marshalling data:", err)
  164. return
  165. }
  166. // 创建HTTP客户端
  167. client := &http.Client{}
  168. // 创建POST请求
  169. req, err := http.NewRequest("POST", AI_GEN_LIST_CSS, bytes.NewBuffer(jsonData))
  170. if err != nil {
  171. fmt.Println("Error creating request:", err)
  172. return
  173. }
  174. // 设置请求头
  175. req.Header.Set("Content-Type", "application/json")
  176. // 发送请求
  177. resp, err := client.Do(req)
  178. if err != nil {
  179. fmt.Println("Error sending request:", err)
  180. return
  181. }
  182. defer resp.Body.Close()
  183. // 读取响应
  184. body, err := ioutil.ReadAll(resp.Body)
  185. if err != nil {
  186. fmt.Println("Error reading response:", err)
  187. return
  188. }
  189. log.Println("AI返回", string(body))
  190. resultCss := struct {
  191. Data struct {
  192. ListBodyCss string `json:"listBodyCss"`
  193. ListItemCss string `json:"listItemCss"`
  194. ListLinkCss string `json:"listLinkCss"`
  195. ListPublishTimeCss string `json:"listPublishTimeCss"`
  196. ListNextPageCss string `json:"listNextPageCss"`
  197. listTurnPageJs string `json:"listTurnPageJs"`
  198. } `json:"data"`
  199. }{}
  200. err = json.Unmarshal(body, &resultCss)
  201. if err != nil {
  202. fmt.Println("Error decodejson response:", err, string(body))
  203. return
  204. }
  205. //TODO 通知开发工具端,CSS选择器有变动
  206. currentTabSpiderConfig.ListBodyCss = resultCss.Data.ListBodyCss
  207. currentTabSpiderConfig.ListItemCss = resultCss.Data.ListItemCss
  208. currentTabSpiderConfig.ListLinkCss = resultCss.Data.ListLinkCss
  209. currentTabSpiderConfig.ListNextPageCss = resultCss.Data.ListNextPageCss
  210. currentTabSpiderConfig.ListPubtimeCss = resultCss.Data.ListPublishTimeCss
  211. currentTabSpiderConfig.ListTurnPageJSCode = resultCss.Data.listTurnPageJs
  212. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "listBodyCss", "css": resultCss.Data.ListBodyCss})
  213. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "listItemCss", "css": resultCss.Data.ListItemCss})
  214. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "listLinkCss", "css": resultCss.Data.ListLinkCss})
  215. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "listPublishTimeCss", "css": resultCss.Data.ListPublishTimeCss})
  216. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "listNextPageCss", "css": resultCss.Data.ListNextPageCss})
  217. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "listTurnPageJs", "css": resultCss.Data.listTurnPageJs})
  218. err = json.NewEncoder(w).Encode(currentTabSpiderConfig)
  219. if err != nil {
  220. log.Println("反向序列化失败")
  221. http.Error(w, err.Error(), http.StatusBadRequest)
  222. return
  223. }
  224. }
  225. // FindDetailCssSelector
  226. func (ws *WebService) FindDetailCssSelector(w http.ResponseWriter, r *http.Request) {
  227. qu.Debug("AI生成详情页CSS...")
  228. w.Header().Set("Access-Control-Allow-Origin", "*")
  229. w.Header().Set("Content-Type", "application/json")
  230. reqData := struct {
  231. Data string `json:"data"`
  232. User string `json:"user"`
  233. }{}
  234. err := json.NewDecoder(r.Body).Decode(&reqData)
  235. if err != nil {
  236. return
  237. }
  238. //发起远程请求,进行大模型计算
  239. reqData.User = "jianyu"
  240. jsonData, err := json.Marshal(reqData)
  241. if err != nil {
  242. fmt.Println("Error marshalling data:", err)
  243. return
  244. }
  245. // 创建HTTP客户端
  246. client := &http.Client{}
  247. // 创建POST请求
  248. req, err := http.NewRequest("POST", AI_GEN_DETAIL_CSS, bytes.NewBuffer(jsonData))
  249. if err != nil {
  250. fmt.Println("Error creating request:", err)
  251. return
  252. }
  253. // 设置请求头
  254. req.Header.Set("Content-Type", "application/json")
  255. // 发送请求
  256. resp, err := client.Do(req)
  257. if err != nil {
  258. fmt.Println("Error sending request:", err)
  259. return
  260. }
  261. defer resp.Body.Close()
  262. // 读取响应
  263. body, err := ioutil.ReadAll(resp.Body)
  264. if err != nil {
  265. fmt.Println("Error reading response:", err)
  266. return
  267. }
  268. log.Println("AI返回", string(body))
  269. resultCss := struct {
  270. Data struct {
  271. TitleCss string `json:"titleCss"`
  272. ContentCss string `json:"contentCss"`
  273. PublishTimeCss string `json:"publishTimeCss"`
  274. PublishUnitCss string `json:"publishUnitCss"`
  275. AttachCss string `json:"attachCss"`
  276. } `json:"data"`
  277. }{}
  278. err = json.Unmarshal(body, &resultCss)
  279. if err != nil {
  280. fmt.Println("Error decodejson response:", err, string(body))
  281. return
  282. }
  283. //TODO 通知开发工具端,CSS选择器有变动
  284. currentTabSpiderConfig.TitleCss = resultCss.Data.TitleCss
  285. currentTabSpiderConfig.PublishTimeCss = resultCss.Data.PublishTimeCss
  286. currentTabSpiderConfig.PublishUnitCss = resultCss.Data.PublishUnitCss
  287. currentTabSpiderConfig.ContentCss = resultCss.Data.ContentCss
  288. currentTabSpiderConfig.AttachCss = resultCss.Data.AttachCss
  289. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "titleCss",
  290. "css": resultCss.Data.TitleCss})
  291. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "contentCss",
  292. "css": resultCss.Data.ContentCss})
  293. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "publishTimeCss",
  294. "css": resultCss.Data.PublishTimeCss})
  295. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "publishUnitCss",
  296. "css": resultCss.Data.PublishUnitCss})
  297. ws.enf.Dispatch("spiderConfigChange", map[string]interface{}{"key": "attachCss",
  298. "css": resultCss.Data.AttachCss})
  299. err = json.NewEncoder(w).Encode(currentTabSpiderConfig)
  300. if err != nil {
  301. log.Println("反向序列化失败")
  302. http.Error(w, err.Error(), http.StatusBadRequest)
  303. return
  304. }
  305. }