script.go 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. /*
  2. *
  3. 脚本加载+调用 封装,
  4. 前期走文件系统加载
  5. 后期走数据库配置,
  6. LUA中公共的方法需要抽出来,主脚本文件加载LUA公共文件
  7. */
  8. package spider
  9. import (
  10. codegrpc "analysiscode/client"
  11. "bytes"
  12. "compress/gzip"
  13. "encoding/base64"
  14. "encoding/json"
  15. "fmt"
  16. "github.com/shopspring/decimal"
  17. gojs "gorunjs/client"
  18. "io"
  19. "io/ioutil"
  20. login "login/client"
  21. mu "mfw/util"
  22. "mime/multipart"
  23. "net/http"
  24. "net/url"
  25. "path"
  26. qu "qfw/util"
  27. "regexp"
  28. sp "spiderutil"
  29. "strconv"
  30. "strings"
  31. "time"
  32. "golang.org/x/text/encoding/simplifiedchinese"
  33. "golang.org/x/text/transform"
  34. "github.com/cjoudrey/gluahttp"
  35. lujson "github.com/yuin/gopher-json"
  36. "github.com/yuin/gopher-lua"
  37. )
  38. // 脚本
  39. type Script struct {
  40. SCode, ScriptFile string
  41. Encoding string
  42. Downloader string //下载器
  43. Timeout int64 //超时时间秒
  44. L *lua.LState
  45. Test_luareqcount int //脚本请求次数
  46. Test_goreqtime int //go发起次数(时间)
  47. Test_goreqlist int //go发起次数(列表)
  48. Test_goreqcon int //go发起次数(正文)
  49. }
  50. // 加载文件
  51. func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ...string) {
  52. s.ScriptFile = script
  53. options := lua.Options{
  54. RegistrySize: 256 * 20,
  55. CallStackSize: 256,
  56. IncludeGoStackTrace: false,
  57. }
  58. s.L = lua.NewState(options)
  59. //s.L.ScriptFileName = s.SCode
  60. s.L.PreloadModule("http", gluahttp.NewHttpModule(&http.Client{}).Loader)
  61. s.L.PreloadModule("json", lujson.Loader)
  62. if len(isfile) > 0 {
  63. if err := s.L.DoFile(script); err != nil {
  64. panic("加载lua脚本错误" + err.Error())
  65. }
  66. } else {
  67. if err := s.L.DoString(script); err != nil {
  68. panic("加载lua脚本错误" + err.Error())
  69. }
  70. }
  71. s.Encoding = s.GetVar("spiderPageEncoding")
  72. //暴露go方法
  73. //download(url,head) 普通下载
  74. s.L.SetGlobal("download", s.L.NewFunction(func(S *lua.LState) int {
  75. head := S.ToTable(-1)
  76. url := S.ToString(-2)
  77. ishttps := S.ToBool(-3)
  78. charset := S.ToString(-4)
  79. if charset == "" {
  80. charset = s.Encoding
  81. }
  82. ret := Download(downloadnode, s.Downloader, url, "get", sp.GetTable(head), charset, false, ishttps, "", s.Timeout)
  83. S.Push(lua.LString(ret))
  84. s.Test_luareqcount++
  85. return 1
  86. }))
  87. s.L.SetGlobal("findContentText", s.L.NewFunction(func(S *lua.LState) int {
  88. gpath := S.ToString(-2)
  89. content := S.ToString(-1)
  90. ret := sp.FindContentText(gpath, content)
  91. S.Push(ret)
  92. return 1
  93. }))
  94. //高级下载download(url,method,param,head,cookie)
  95. s.L.SetGlobal("downloadAdv", s.L.NewFunction(func(S *lua.LState) int {
  96. cookie := S.ToString(-1)
  97. head := S.ToTable(-2)
  98. param := S.ToTable(-3)
  99. method := S.ToString(-4)
  100. url := S.ToString(-5)
  101. ishttps := S.ToBool(-6)
  102. charset := S.ToString(-7)
  103. if charset == "" {
  104. charset = s.Encoding
  105. }
  106. var mycookie []*http.Cookie
  107. json.Unmarshal([]byte(cookie), &mycookie)
  108. var ret string
  109. var retcookie []*http.Cookie
  110. var headers = map[string]interface{}{}
  111. if param == nil {
  112. ptext := map[string]interface{}{"text": S.ToString(-3)}
  113. ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, ptext, sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
  114. } else {
  115. ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
  116. }
  117. S.Push(lua.LString(ret))
  118. scookie, _ := json.Marshal(retcookie)
  119. S.Push(lua.LString(scookie))
  120. hTable := sp.MapToLuaTable(S, headers)
  121. S.Push(hTable)
  122. s.Test_luareqcount++
  123. return 3
  124. }))
  125. s.L.SetGlobal("downloadAdvNew", s.L.NewFunction(func(S *lua.LState) int {
  126. cookie := S.ToString(-1)
  127. head := S.ToTable(-2)
  128. param := S.ToTable(-3)
  129. method := S.ToString(-4)
  130. url := S.ToString(-5)
  131. ishttps := S.ToBool(-6)
  132. charset := S.ToString(-7)
  133. if charset == "" {
  134. charset = s.Encoding
  135. }
  136. var mycookie []*http.Cookie
  137. json.Unmarshal([]byte(cookie), &mycookie)
  138. var ret string
  139. var retcookie []*http.Cookie
  140. var headers = map[string]interface{}{}
  141. if param == nil {
  142. ptext := map[string]interface{}{"text": S.ToString(-3)}
  143. ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, ptext, sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
  144. } else {
  145. ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
  146. }
  147. S.Push(lua.LString(ret))
  148. scookie, _ := json.Marshal(retcookie)
  149. S.Push(lua.LString(scookie))
  150. hTable := sp.MapToLuaTable(S, headers)
  151. S.Push(hTable)
  152. s.Test_luareqcount++
  153. return 3
  154. }))
  155. s.L.SetGlobal("findOneText", s.L.NewFunction(func(S *lua.LState) int {
  156. nodetype := S.ToString(-3)
  157. gpath := S.ToString(-2)
  158. content := S.ToString(-1)
  159. ret := sp.FindOneText(gpath, content, nodetype)
  160. S.Push(ret)
  161. return 1
  162. }))
  163. s.L.SetGlobal("findOneHtml", s.L.NewFunction(func(S *lua.LState) int {
  164. nodetype := S.ToString(-3)
  165. gpath := S.ToString(-2)
  166. content := S.ToString(-1)
  167. ret := sp.FindOneHtml(gpath, content, nodetype)
  168. S.Push(ret)
  169. return 1
  170. }))
  171. s.L.SetGlobal("findListText", s.L.NewFunction(func(S *lua.LState) int {
  172. gpath := S.ToString(-2)
  173. content := S.ToString(-1)
  174. ret := s.L.NewTable()
  175. sp.FindListText(gpath, content, ret)
  176. S.Push(ret)
  177. return 1
  178. }))
  179. s.L.SetGlobal("findListHtml", s.L.NewFunction(func(S *lua.LState) int {
  180. gpath := S.ToString(-2)
  181. content := S.ToString(-1)
  182. ret := s.L.NewTable()
  183. sp.FindListHtml(gpath, content, ret)
  184. S.Push(ret)
  185. return 1
  186. }))
  187. //推送列表页下载数据量
  188. s.L.SetGlobal("sendListNum", s.L.NewFunction(func(S *lua.LState) int {
  189. pageno := S.ToInt(-3) //当前页
  190. index := S.ToInt(-2) //第几条数据
  191. table := S.ToTable(-1)
  192. qu.Debug(s.SCode, index, table.Len())
  193. if index == 1 {
  194. if pageno == 1 { //第一页数据
  195. oneMap := sp.TableToMap(table)
  196. text, _ := json.Marshal(oneMap)
  197. hashText := sp.HexTextByte(text)
  198. qu.Debug("第一页:", hashText)
  199. } else if pageno == 2 { //第一页数据
  200. twoMap := sp.TableToMap(table)
  201. text, _ := json.Marshal(twoMap)
  202. hashText := sp.HexTextByte(text)
  203. qu.Debug("第二页:", hashText)
  204. }
  205. }
  206. return 1
  207. }))
  208. s.L.SetGlobal("findMap", s.L.NewFunction(func(S *lua.LState) int {
  209. qmap := S.ToTable(-2)
  210. content := S.ToString(-1)
  211. ret := s.L.NewTable()
  212. sp.FindMap(qmap, content, ret)
  213. S.Push(ret)
  214. return 1
  215. }))
  216. //调用jsvm
  217. s.L.SetGlobal("jsvm", s.L.NewFunction(func(S *lua.LState) int {
  218. js := S.ToString(-1)
  219. ret := s.L.NewTable()
  220. if js == "" {
  221. ret.RawSet(lua.LString("val"), lua.LString(""))
  222. ret.RawSet(lua.LString("err"), lua.LString("js is null"))
  223. } else {
  224. rep := sp.JsVmPost(sp.Config.JsVmUrl, js)
  225. ret.RawSet(lua.LString("val"), lua.LString(qu.ObjToString(rep["val"])))
  226. ret.RawSet(lua.LString("err"), lua.LString(qu.ObjToString(rep["err"])))
  227. }
  228. S.Push(ret)
  229. return 1
  230. }))
  231. //指定下载器
  232. s.L.SetGlobal("changeDownloader", s.L.NewFunction(func(S *lua.LState) int {
  233. s.Downloader = GetOneDownloader()
  234. S.Push(lua.LString(s.Downloader))
  235. return 1
  236. }))
  237. //手工延时
  238. s.L.SetGlobal("timeSleep", s.L.NewFunction(func(S *lua.LState) int {
  239. time.Sleep(1 * time.Second)
  240. return 0
  241. }))
  242. s.L.SetGlobal("runSleep", s.L.NewFunction(func(S *lua.LState) int {
  243. t := S.ToInt(-1)
  244. if t <= 0 {
  245. t = 1
  246. }
  247. time.Sleep(time.Duration(t) * time.Second)
  248. return 0
  249. }))
  250. //编码解码
  251. s.L.SetGlobal("transCode", s.L.NewFunction(func(S *lua.LState) int {
  252. codeType := strings.ToLower(S.ToString(-2))
  253. str := S.CheckString(-1)
  254. switch codeType {
  255. case "unicode":
  256. str = strings.Replace(str, "%u", "\\u", -1)
  257. str = transUnic(str)
  258. case "urlencode_gbk":
  259. data, _ := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(str)), simplifiedchinese.GBK.NewEncoder()))
  260. l, _ := url.Parse("http://a.com/?" + string(data))
  261. tmpstr := l.Query().Encode()
  262. if len(tmpstr) > 1 {
  263. str = tmpstr[0 : len(tmpstr)-1]
  264. } else {
  265. str = ""
  266. }
  267. case "urlencode_utf8":
  268. l, _ := url.Parse("http://a.com/?" + str)
  269. tmpstr := l.Query().Encode()
  270. if len(tmpstr) > 1 {
  271. str = tmpstr[0 : len(tmpstr)-1]
  272. } else {
  273. str = ""
  274. }
  275. case "urldecode_utf8":
  276. str, _ = url.QueryUnescape(str)
  277. case "decode64":
  278. str = sp.DecodeB64(str)
  279. case "encodemd5":
  280. str = qu.GetMd5String(str)
  281. case "htmldecode": //html实体码
  282. //txt := `<div align="left" style="margin-left: 0pt;"><span style='font-family:; font-size:13px; color:#000000'>&#22826;&#38451;&#23707;&#29305;&#21220;&#28040;&#38450;&#31449;&#12289;&#26494;&#28006;&#29305;&#21220;&#28040;&#38450;&#31449;&#24314;&#35774;&#39033;&#30446;&#35774;&#35745;&#20013;&#26631;&#20844;&#31034;</span></div>`
  283. str = S.ToString(-1)
  284. reg, _ := regexp.Compile("&#\\d+;")
  285. str = reg.ReplaceAllStringFunc(str, func(src string) string {
  286. v, _ := strconv.Atoi(src[2 : len(src)-1])
  287. return string(rune(v))
  288. })
  289. }
  290. S.Push(lua.LString(str))
  291. return 1
  292. }))
  293. //保存错误日志
  294. s.L.SetGlobal("saveErrLog", s.L.NewFunction(func(S *lua.LState) int {
  295. return 0
  296. }))
  297. //添加改版日志
  298. s.L.SetGlobal("saveRevisionLog", s.L.NewFunction(func(S *lua.LState) int {
  299. return 0
  300. }))
  301. //如果服务端返回的html是gzip压缩过格式的 这里需要转一下
  302. s.L.SetGlobal("unGzip", s.L.NewFunction(func(S *lua.LState) int {
  303. html := S.ToString(-1)
  304. bs := []byte(html)
  305. gzipreader, _ := gzip.NewReader(bytes.NewReader(bs))
  306. bs, _ = ioutil.ReadAll(gzipreader)
  307. S.Push(lua.LString(bs))
  308. return 1
  309. }))
  310. s.L.SetGlobal("titleRepeatJudgement", s.L.NewFunction(func(S *lua.LState) int {
  311. bResult := false
  312. S.Push(lua.LBool(bResult))
  313. return 1
  314. }))
  315. //解析附件中的word、pdf
  316. s.L.SetGlobal("officeAnalysis", s.L.NewFunction(func(S *lua.LState) int {
  317. ext := map[string]byte{"pdf": byte(0), "doc": byte(1), "docx": byte(2)}
  318. str := S.ToString(-2)
  319. extension := S.ToString(-1)
  320. bs, _ := base64.StdEncoding.DecodeString(str)
  321. bs = append([]byte{ext[extension]}, bs...)
  322. msgid := mu.UUID(8)
  323. Msclient.Call("", msgid, mu.SERVICE_OFFICE_ANALYSIS, mu.SENDTO_TYPE_ALL_RECIVER, bs, 60)
  324. return 1
  325. }))
  326. //下载附件download(url,method,param,head,cookie,fileName)
  327. s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
  328. cookie := S.ToString(-1)
  329. head := S.ToTable(-2)
  330. param := S.ToTable(-3)
  331. method := S.ToString(-4)
  332. url := S.ToString(-5)
  333. fileName := S.ToString(-6)
  334. ishttps := strings.Contains(url, "https")
  335. base64UrlReg := regexp.MustCompile("data:image")
  336. indexArr := base64UrlReg.FindStringIndex(url)
  337. name, size, ftype, fid := "", "", "", ""
  338. //base64 url
  339. if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqAAAAOwCAYAAAD
  340. //截取base64
  341. start := indexArr[0]
  342. url = url[start:]
  343. fileName = "文件下载.jpg"
  344. index := strings.Index(url, ",")
  345. dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(url[index+1:]))
  346. ret, err := io.ReadAll(dec)
  347. if err == nil && len(ret) > 0 {
  348. url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, "", ret)
  349. }
  350. } else { //正常url
  351. var mycookie []*http.Cookie
  352. if cookie != "{}" {
  353. json.Unmarshal([]byte(cookie), &mycookie)
  354. } else {
  355. mycookie = make([]*http.Cookie, 0)
  356. }
  357. fileName = strings.TrimSpace(fileName)
  358. url = strings.TrimSpace(url)
  359. ret := DownloadFile(s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout)
  360. qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
  361. if ret == nil || len(ret) < 1024*3 {
  362. qu.Debug("下载文件出错!")
  363. } else {
  364. ftype = qu.GetFileType(ret)
  365. url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
  366. if (ftype == "docx" || ftype == "doc") && len(GarbledCodeReg.FindAllString(string(ret), -1)) > 10 {
  367. name = "附件中含有乱码"
  368. }
  369. }
  370. if strings.TrimSpace(ftype) == "" {
  371. if len(path.Ext(name)) > 0 {
  372. ftype = path.Ext(name)[1:]
  373. }
  374. }
  375. }
  376. S.Push(lua.LString(url))
  377. S.Push(lua.LString(name))
  378. S.Push(lua.LString(size))
  379. S.Push(lua.LString(ftype))
  380. S.Push(lua.LString(fid))
  381. return 5
  382. }))
  383. s.L.SetGlobal("downloadBase64File", s.L.NewFunction(func(S *lua.LState) int {
  384. url := S.ToString(-3)
  385. fileName := S.ToString(-2)
  386. base64Img := S.ToString(-1)
  387. if fileName == "" {
  388. fileName = "文件下载"
  389. }
  390. fileName = fileName + ".jpg"
  391. i := strings.Index(base64Img, ",")
  392. dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(base64Img[i+1:]))
  393. ret, err := io.ReadAll(dec)
  394. name, size, ftype, fid := "", "", "", ""
  395. if err == nil && len(ret) > 0 {
  396. url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
  397. }
  398. S.Push(lua.LString(url))
  399. S.Push(lua.LString(name))
  400. S.Push(lua.LString(size))
  401. S.Push(lua.LString(ftype))
  402. S.Push(lua.LString(fid))
  403. return 5
  404. }))
  405. //支持正则
  406. s.L.SetGlobal("regexp", s.L.NewFunction(func(S *lua.LState) int {
  407. index := int(S.ToNumber(-1))
  408. regstr := S.ToString(-2)
  409. text := S.ToString(-3)
  410. reg := regexp.MustCompile(regstr)
  411. reps := reg.FindAllStringSubmatchIndex(text, -1)
  412. ret := s.L.NewTable()
  413. number := 0
  414. for _, v := range reps {
  415. number++
  416. ret.Insert(number, lua.LString(text[v[index]:v[index+1]]))
  417. }
  418. S.Push(ret)
  419. return 1
  420. }))
  421. //支持替换
  422. s.L.SetGlobal("replace", s.L.NewFunction(func(S *lua.LState) int {
  423. n := S.ToInt(-4)
  424. if n <= 0 {
  425. n = -1
  426. }
  427. text := S.ToString(-3)
  428. old := S.ToString(-2)
  429. repl := S.ToString(-1)
  430. text = strings.Replace(text, old, repl, n)
  431. S.Push(lua.LString(text))
  432. return 1
  433. }))
  434. //标题的关键词、排除词过滤
  435. s.L.SetGlobal("pagefilterword", s.L.NewFunction(func(S *lua.LState) int {
  436. keyWordReg := regexp.MustCompile(sp.Config.Word["keyword"])
  437. notKeyWordReg := regexp.MustCompile(sp.Config.Word["notkeyword"])
  438. data := S.ToTable(-1)
  439. dataMap := sp.TableToMap(data)
  440. ret := s.L.NewTable()
  441. num := 1
  442. for _, v := range dataMap {
  443. tmp := v.(map[string]interface{})
  444. isOk := false
  445. if title := qu.ObjToString(tmp["title"]); title != "" {
  446. if keyWordReg.MatchString(title) && !notKeyWordReg.MatchString(title) {
  447. isOk = true
  448. }
  449. }
  450. if isOk {
  451. ret.Insert(num, sp.MapToLuaTable(S, tmp))
  452. num++
  453. }
  454. }
  455. S.Push(ret)
  456. return 1
  457. }))
  458. //标题的关键词、排除词过滤
  459. s.L.SetGlobal("detailfilterword", s.L.NewFunction(func(S *lua.LState) int {
  460. keyWordReg := regexp.MustCompile(sp.Config.Word["keyword"])
  461. notKeyWordReg := regexp.MustCompile(sp.Config.Word["notkeyword"])
  462. data := S.ToTable(-1)
  463. dataMap := sp.TableToMap(data)
  464. if title := qu.ObjToString(dataMap["title"]); title != "" {
  465. if keyWordReg.MatchString(title) && !notKeyWordReg.MatchString(title) {
  466. S.Push(lua.LBool(true))
  467. return 1
  468. } else {
  469. qu.Debug(s.SCode, dataMap["href"], " title error")
  470. }
  471. } else {
  472. qu.Debug(s.SCode, dataMap["href"], " title error")
  473. }
  474. S.Push(lua.LBool(false))
  475. return 1
  476. }))
  477. //detail过滤
  478. s.L.SetGlobal("filterdetail", s.L.NewFunction(func(S *lua.LState) int {
  479. /*
  480. 1.长度判断 (特殊处理:详情请访问原网页!;详见原网页;见原网页;无;无相关内容;无正文内容)
  481. 2.是否含汉字
  482. */
  483. reg1 := regexp.MustCompile("(原网页|无|无相关内容|无正文内容|详见附件|见附件)")
  484. reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
  485. detail := S.ToString(-1)
  486. if reg1.MatchString(detail) {
  487. S.Push(lua.LBool(true))
  488. return 1
  489. }
  490. if len([]rune(detail)) < 50 || !reg2.MatchString(detail) {
  491. S.Push(lua.LBool(false))
  492. return 1
  493. }
  494. S.Push(lua.LBool(false))
  495. return 1
  496. }))
  497. //匹配汉字
  498. s.L.SetGlobal("matchan", s.L.NewFunction(func(S *lua.LState) int {
  499. reg1 := regexp.MustCompile("(见附件|详见附件)")
  500. reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
  501. detail := S.ToString(-1)
  502. detail = reg1.ReplaceAllString(detail, "")
  503. ok := reg2.MatchString(detail)
  504. S.Push(lua.LBool(ok))
  505. return 1
  506. }))
  507. //base64加密
  508. s.L.SetGlobal("encodeBase64", s.L.NewFunction(func(S *lua.LState) int {
  509. text := S.ToString(-1)
  510. base64Text := base64.StdEncoding.EncodeToString([]byte(text))
  511. S.Push(lua.LString(base64Text))
  512. return 1
  513. }))
  514. //base64解密
  515. s.L.SetGlobal("decodeBase64", s.L.NewFunction(func(S *lua.LState) int {
  516. text := S.ToString(-1)
  517. result := ""
  518. byteText, err := base64.StdEncoding.DecodeString(text)
  519. if err == nil {
  520. result = string(byteText)
  521. }
  522. S.Push(lua.LString(result))
  523. return 1
  524. }))
  525. //GB2312字符集解码
  526. s.L.SetGlobal("decodeGB2312", s.L.NewFunction(func(S *lua.LState) int {
  527. text := S.ToString(-1)
  528. result := ""
  529. decodedString, _, err := transform.String(simplifiedchinese.GB18030.NewDecoder(), text)
  530. if err == nil {
  531. result = decodedString
  532. }
  533. S.Push(lua.LString(result))
  534. return 1
  535. }))
  536. //aes cbc模式加密
  537. s.L.SetGlobal("aesEncryptCBC", s.L.NewFunction(func(S *lua.LState) int {
  538. origData := S.ToString(-3)
  539. key := S.ToString(-2)
  540. iv := S.ToString(-1)
  541. bytekey := []byte(key)
  542. byteorigData := []byte(origData)
  543. byteiv := []byte(iv)
  544. encrypted := sp.AesCBCEncrypt(byteorigData, bytekey, byteiv)
  545. // 将加密后的数据和初始向量进行Base64编码
  546. result := base64.StdEncoding.EncodeToString(encrypted)
  547. S.Push(lua.LString(result))
  548. return 1
  549. }))
  550. //aes cbc模式解密
  551. s.L.SetGlobal("aesDecryptCBC", s.L.NewFunction(func(S *lua.LState) int {
  552. origData := S.ToString(-3)
  553. key := S.ToString(-2)
  554. iv := S.ToString(-1)
  555. bytekey := []byte(key)
  556. byteiv := []byte(iv)
  557. data, _ := base64.StdEncoding.DecodeString(origData)
  558. result := sp.AesCBCDecrypter(data, bytekey, byteiv)
  559. S.Push(lua.LString(result))
  560. return 1
  561. }))
  562. //aes ecb模式加密
  563. s.L.SetGlobal("aesEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
  564. origData := S.ToString(-2)
  565. key := S.ToString(-1)
  566. bytekey := []byte(key)
  567. byteorigData := []byte(origData)
  568. encrypted := sp.AesECBEncrypt(byteorigData, bytekey)
  569. result := base64.StdEncoding.EncodeToString(encrypted)
  570. S.Push(lua.LString(result))
  571. return 1
  572. }))
  573. //aes ecb模式解密
  574. s.L.SetGlobal("aesDecryptECB", s.L.NewFunction(func(S *lua.LState) int {
  575. origData := S.ToString(-2)
  576. key := S.ToString(-1)
  577. data, _ := base64.StdEncoding.DecodeString(origData)
  578. result := sp.AesECBDecrypter(data, []byte(key))
  579. S.Push(lua.LString(result))
  580. return 1
  581. }))
  582. //des ecb模式加密
  583. s.L.SetGlobal("desEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
  584. origData := S.ToString(-2)
  585. key := S.ToString(-1)
  586. encrypted := sp.DesECBEncrypt([]byte(origData), []byte(key))
  587. result := base64.StdEncoding.EncodeToString(encrypted)
  588. S.Push(lua.LString(result))
  589. return 1
  590. }))
  591. //des ecb模式解密
  592. s.L.SetGlobal("desDecryptECB", s.L.NewFunction(func(S *lua.LState) int {
  593. origData := S.ToString(-2)
  594. key := S.ToString(-1)
  595. data, _ := base64.StdEncoding.DecodeString(origData)
  596. result := sp.DesECBDecrypter(data, []byte(key))
  597. S.Push(lua.LString(result))
  598. return 1
  599. }))
  600. //des cbc模式加密
  601. s.L.SetGlobal("desEncryptCBC", s.L.NewFunction(func(S *lua.LState) int {
  602. origData := S.ToString(-3)
  603. key := S.ToString(-2)
  604. iv := S.ToString(-1)
  605. bytekey := []byte(key)
  606. byteorigData := []byte(origData)
  607. byteiv := []byte(iv)
  608. encrypted := sp.DesCBCEncrypt(byteorigData, bytekey, byteiv)
  609. result := base64.StdEncoding.EncodeToString(encrypted)
  610. S.Push(lua.LString(result))
  611. return 1
  612. }))
  613. //des cbc模式解密
  614. s.L.SetGlobal("desDecryptCBC", s.L.NewFunction(func(S *lua.LState) int {
  615. origData := S.ToString(-3)
  616. key := S.ToString(-2)
  617. iv := S.ToString(-1)
  618. bytekey := []byte(key)
  619. byteiv := []byte(iv)
  620. data, _ := base64.StdEncoding.DecodeString(origData)
  621. result := sp.DesCBCDecrypter(data, bytekey, byteiv)
  622. S.Push(lua.LString(result))
  623. return 1
  624. }))
  625. //rsa 公钥加密
  626. s.L.SetGlobal("rsaEncrypt", s.L.NewFunction(func(S *lua.LState) int {
  627. origData := S.ToString(-2)
  628. key := S.ToString(-1)
  629. encrypted := sp.EncryptWithPublicKey([]byte(origData), []byte(key))
  630. result := base64.StdEncoding.EncodeToString(encrypted)
  631. S.Push(lua.LString(result))
  632. return 1
  633. }))
  634. //rsa 私钥解密
  635. s.L.SetGlobal("rsaDecrypt", s.L.NewFunction(func(S *lua.LState) int {
  636. origData := S.ToString(-2)
  637. key := S.ToString(-1)
  638. data, _ := base64.StdEncoding.DecodeString(origData)
  639. result := sp.DecryptWithPrivateKey(data, []byte(key))
  640. S.Push(lua.LString(result))
  641. return 1
  642. }))
  643. //根据正文获取发布时间
  644. s.L.SetGlobal("getPublishtime", s.L.NewFunction(func(S *lua.LState) int {
  645. detail := S.ToString(-2)
  646. contenthtml := S.ToString(-1)
  647. publishtime := sp.GetPublishtime([]string{contenthtml, detail})
  648. S.Push(lua.LString(publishtime))
  649. return 1
  650. }))
  651. //匹配
  652. s.L.SetGlobal("stringFind", s.L.NewFunction(func(S *lua.LState) int {
  653. regstr := S.ToString(-1)
  654. text := S.ToString(-2)
  655. reg := regexp.MustCompile(regstr)
  656. result := reg.FindString(text)
  657. isMatch := false
  658. if result != "" {
  659. isMatch = true
  660. }
  661. S.Push(lua.LString(result))
  662. S.Push(lua.LBool(isMatch))
  663. return 2
  664. }))
  665. //截取
  666. s.L.SetGlobal("stringSub", s.L.NewFunction(func(S *lua.LState) int {
  667. text := S.ToString(-3)
  668. start := S.ToInt(-2)
  669. end := S.ToInt(-1)
  670. result := ""
  671. if len(text) > 0 {
  672. textRune := []rune(text)
  673. textLen := len(textRune)
  674. if end < 0 {
  675. if start > 0 { //正向截取到倒数第end位
  676. result = string(textRune[start-1 : textLen+1+end])
  677. } else if start < 0 { //反向截取 从倒数第start位截取到倒数第end位
  678. result = string(textRune[textLen+start : textLen+1+end])
  679. }
  680. } else if start > 0 && end >= start && end <= textLen { //从第start个截取到第end个
  681. result = string(textRune[start-1 : end])
  682. }
  683. // if end == -1 {
  684. // if start >= 1 { //正向截取到结尾
  685. // result = string(textRune[start-1:])
  686. // } else if start < 0 && textLen+start >= 0 { //反向截取后缀
  687. // result = string(textRune[textLen+start:])
  688. // }
  689. // } else if start >= 1 && end <= textLen { //从第start个截取到第end个
  690. // result = string(textRune[start-1 : end])
  691. // }
  692. }
  693. S.Push(lua.LString(result))
  694. return 1
  695. }))
  696. //长度
  697. s.L.SetGlobal("stringLen", s.L.NewFunction(func(S *lua.LState) int {
  698. text := S.ToString(-1)
  699. textLen := len([]rune(text))
  700. S.Push(lua.LNumber(textLen))
  701. return 1
  702. }))
  703. //去除特殊标签中间内容
  704. s.L.SetGlobal("getPureContent", s.L.NewFunction(func(S *lua.LState) int {
  705. con := S.ToString(-1)
  706. reg := regexp.MustCompile("(?s)<(!%-%-|!--|style).*?(%-%-|--|style)>") //注释 css
  707. con = reg.ReplaceAllString(con, "")
  708. // indexArr := reg.FindAllStringIndex(con, -1)
  709. // for i := len(indexArr) - 1; i >= 0; i-- {
  710. // if index := indexArr[i]; len(index) == 2 {
  711. // con = con[:index[0]] + con[index[1]:]
  712. // }
  713. // }
  714. S.Push(lua.LString(con))
  715. return 1
  716. }))
  717. //interface转string
  718. s.L.SetGlobal("formatToString", s.L.NewFunction(func(S *lua.LState) int {
  719. strNum := S.ToString(-1)
  720. decimalNum, _ := decimal.NewFromString(strNum)
  721. S.Push(lua.LString(decimalNum.String()))
  722. return 1
  723. }))
  724. //获取验证码
  725. s.L.SetGlobal("getCodeByPath", s.L.NewFunction(func(S *lua.LState) int {
  726. cookie := S.ToString(-1)
  727. head := S.ToTable(-2)
  728. stype := S.ToString(-3)
  729. path := S.ToString(-4)
  730. proxy := S.ToBool(-5)
  731. headMap := sp.GetTable(head)
  732. //qu.Debug("cookie----------", cookie)
  733. //qu.Debug("headMap----------", headMap)
  734. headJsonStr := ""
  735. headByte, err := json.Marshal(headMap)
  736. if err == nil {
  737. headJsonStr = string(headByte)
  738. }
  739. code, respHead, respCookie := codegrpc.GetCodeByPath(path, stype, headJsonStr, cookie, proxy)
  740. qu.Debug("GetCodeByPath code====", code)
  741. //qu.Debug("respHead====", respHead)
  742. //qu.Debug("respCookie====", respCookie)
  743. S.Push(lua.LString(code))
  744. respHeadMap := map[string]interface{}{}
  745. json.Unmarshal([]byte(respHead), &respHeadMap)
  746. hTable := sp.MapToLuaTable(S, respHeadMap)
  747. S.Push(hTable)
  748. S.Push(lua.LString(respCookie))
  749. return 3
  750. }))
  751. s.L.SetGlobal("goRunJs", s.L.NewFunction(func(S *lua.LState) int {
  752. param := S.ToString(-2) //list or detail
  753. step := S.ToString(-1) //参数
  754. result := gojs.GoRunJsGetResult(s.SCode, param, step)
  755. qu.Debug("Go Run Js Result:", param, step, result)
  756. S.Push(lua.LString(result))
  757. return 1
  758. }))
  759. s.L.SetGlobal("newDownloadFile", s.L.NewFunction(func(S *lua.LState) int {
  760. cookie := S.ToString(-1)
  761. head := S.ToTable(-2)
  762. param := S.ToTable(-3)
  763. method := S.ToString(-4)
  764. url := S.ToString(-5)
  765. fileName := S.ToString(-6)
  766. ishttps := strings.Contains(url, "https")
  767. var mycookie []*http.Cookie
  768. if cookie != "{}" {
  769. json.Unmarshal([]byte(cookie), &mycookie)
  770. } else {
  771. mycookie = make([]*http.Cookie, 0)
  772. }
  773. fileName = strings.TrimSpace(fileName)
  774. url = strings.TrimSpace(url)
  775. ret := NewDownloadFile(s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout, false)
  776. name, size, ftype, fid := "", "", "", ""
  777. qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
  778. if ret == nil || len(ret) < 1024*3 {
  779. qu.Debug("下载文件出错!")
  780. } else {
  781. ftype = qu.GetFileType(ret)
  782. if (ftype == "docx" || ftype == "doc") && len(GarbledCodeReg.FindAllString(string(ret), -1)) > 10 {
  783. url, name, size, ftype, fid = "附件中含有乱码", "附件中含有乱码", "", "", ""
  784. } else {
  785. url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
  786. }
  787. }
  788. if strings.TrimSpace(ftype) == "" {
  789. if len(path.Ext(name)) > 0 {
  790. ftype = path.Ext(name)[1:]
  791. }
  792. }
  793. S.Push(lua.LString(url))
  794. S.Push(lua.LString(name))
  795. S.Push(lua.LString(size))
  796. S.Push(lua.LString(ftype))
  797. S.Push(lua.LString(fid))
  798. return 5
  799. }))
  800. //渲染页面下载
  801. s.L.SetGlobal("downloadByRender", s.L.NewFunction(func(S *lua.LState) int {
  802. href := S.ToString(-1)
  803. contentHtml := sp.DownloadByRender(href)
  804. S.Push(lua.LString(contentHtml))
  805. return 1
  806. }))
  807. //渲染页面下载
  808. s.L.SetGlobal("getSimulateLoginInfo", s.L.NewFunction(func(S *lua.LState) int {
  809. param := S.ToTable(-1)
  810. header := S.ToTable(-2)
  811. byteParam, _ := json.Marshal(sp.TableToMap(param))
  812. headerParam, _ := json.Marshal(sp.TableToMap(header))
  813. stype := S.ToString(-3) //login:登陆,get:获取cookie
  814. qu.Debug(string(headerParam))
  815. qu.Debug(string(byteParam))
  816. cookie := login.GetSimulateLoginInfo(*site, stype, string(headerParam), string(byteParam))
  817. S.Push(lua.LString(cookie))
  818. return 1
  819. }))
  820. //辽宁省招标投标监管网
  821. s.L.SetGlobal("multipartRequest", s.L.NewFunction(func(S *lua.LState) int {
  822. yzm := S.ToString(-5)
  823. verifyId := S.ToString(-4)
  824. id := S.ToString(-3)
  825. param := S.ToTable(-2)
  826. head := S.ToTable(-1)
  827. paramMap := sp.GetTable(param)
  828. headMap := sp.GetTable(head)
  829. qu.Debug(paramMap)
  830. qu.Debug(headMap)
  831. paramMap = map[string]interface{}{
  832. "baseUrl": "https://www.lntb.gov.cn/mhback",
  833. "realRequestUri": "/api/cTenderNoticeController/getDetail/" + id,
  834. "verifyId": verifyId,
  835. "result": yzm,
  836. }
  837. headMap = map[string]interface{}{
  838. "Referer": "",
  839. "sec-ch-ua-mobile": "?0",
  840. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
  841. }
  842. //由于验证码识别不准确,加减法呢特殊处理
  843. for _, w := range []string{"+", "-"} {
  844. if arr := strings.Split(yzm, w); len(arr) == 2 {
  845. if w == "+" {
  846. yzm = fmt.Sprint(qu.IntAll(arr[0]) + qu.IntAll(arr[1]))
  847. } else {
  848. yzm = fmt.Sprint(qu.IntAll(arr[0]) - qu.IntAll(arr[1]))
  849. }
  850. }
  851. }
  852. qu.Debug(yzm, verifyId, id)
  853. payload := &bytes.Buffer{}
  854. writer := multipart.NewWriter(payload)
  855. //param
  856. for k, v := range paramMap {
  857. _ = writer.WriteField(k, v.(string))
  858. }
  859. err := writer.Close()
  860. if err != nil {
  861. qu.Debug(err)
  862. return 1
  863. }
  864. //request
  865. client := &http.Client{}
  866. req, err := http.NewRequest("POST", "https://www.lntb.gov.cn/mhback/kk-anti-reptile/validate", payload)
  867. //header
  868. for k, v := range headMap {
  869. req.Header.Add(k, v.(string))
  870. }
  871. req.Header.Set("Content-Type", writer.FormDataContentType())
  872. res, err := client.Do(req)
  873. if err != nil {
  874. qu.Debug(err)
  875. return 1
  876. }
  877. defer res.Body.Close()
  878. body, err := ioutil.ReadAll(res.Body)
  879. if err != nil {
  880. fmt.Println(err)
  881. return 1
  882. }
  883. result := false
  884. resultMap := map[string]bool{}
  885. if json.Unmarshal(body, &resultMap) == nil {
  886. result = resultMap["result"]
  887. }
  888. fmt.Println(string(body), resultMap)
  889. S.Push(lua.LBool(result))
  890. return 1
  891. }))
  892. //chromedp下载
  893. s.L.SetGlobal("downloadByChrome", s.L.NewFunction(func(S *lua.LState) int {
  894. timeout := S.ToInt64(-2)
  895. taskStr := S.ToString(-1)
  896. cam := sp.ChromeActionMap{}
  897. if json.Unmarshal([]byte(taskStr), &cam) == nil {
  898. if len(cam.BaseActions) > 0 {
  899. if len(cam.RangeActions) > 0 && cam.RangeTimes > 0 {
  900. for times := 1; times <= cam.RangeTimes; times++ {
  901. cam.BaseActions = append(cam.BaseActions, cam.RangeActions...)
  902. }
  903. }
  904. chromeTask := sp.ChromeTask{
  905. TimeOut: timeout,
  906. Actions: cam.BaseActions,
  907. }
  908. ret := DownloadByChrome(downloadnode, s.Downloader, chromeTask, s.Timeout)
  909. S.Push(sp.MapToTable(S, ret))
  910. } else {
  911. S.Push(lua.LString("未设置基础循环动作"))
  912. }
  913. } else {
  914. S.Push(lua.LString("chrome task json 格式化错误"))
  915. }
  916. return 1
  917. }))
  918. //针对中国招标投标公共服务平台三级页瑞数加密下载方法
  919. s.L.SetGlobal("downloadByDataIntercept", s.L.NewFunction(func(S *lua.LState) int {
  920. url := S.ToString(-4)
  921. url_regex := S.ToString(-3)
  922. timeout := S.ToInt(-2)
  923. proxy := S.ToBool(-1)
  924. headers := sp.DownloadByDataIntercept(url, url_regex, timeout, proxy)
  925. table := sp.MapToLuaTable(S, headers)
  926. S.Push(table)
  927. return 1
  928. }))
  929. }
  930. func (s *Script) Reload() {
  931. s.L.Close()
  932. site := ""
  933. s.LoadScript(&site, "", s.ScriptFile)
  934. }
  935. // unicode转码
  936. func transUnic(str string) string {
  937. buf := bytes.NewBuffer(nil)
  938. i, j := 0, len(str)
  939. for i < j {
  940. x := i + 6
  941. if x > j {
  942. buf.WriteString(str[i:])
  943. break
  944. }
  945. if str[i] == '\\' && str[i+1] == 'u' {
  946. hex := str[i+2 : x]
  947. r, err := strconv.ParseUint(hex, 16, 64)
  948. if err == nil {
  949. buf.WriteRune(rune(r))
  950. } else {
  951. buf.WriteString(str[i:x])
  952. }
  953. i = x
  954. } else {
  955. buf.WriteByte(str[i])
  956. i++
  957. }
  958. }
  959. return buf.String()
  960. }
  961. // 取得变量
  962. func (s *Script) GetVar(key string) string {
  963. return s.L.GetGlobal(key).String()
  964. }
  965. func (s *Script) GetIntVar(key string) int {
  966. lv := s.L.GetGlobal(key)
  967. if v, ok := lv.(lua.LNumber); ok {
  968. return int(v)
  969. }
  970. return -1
  971. }
  972. func (s *Script) GetBoolVar(key string) bool {
  973. lv := s.L.GetGlobal(key)
  974. if v, ok := lv.(lua.LBool); ok {
  975. return bool(v)
  976. }
  977. return false
  978. }