123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999 |
- /*
- *
- 脚本加载+调用 封装,
- 前期走文件系统加载
- 后期走数据库配置,
- LUA中公共的方法需要抽出来,主脚本文件加载LUA公共文件
- */
- package spider
- import (
- codegrpc "analysiscode/client"
- "bytes"
- "compress/gzip"
- "encoding/base64"
- "encoding/json"
- "fmt"
- "github.com/shopspring/decimal"
- gojs "gorunjs/client"
- "io"
- "io/ioutil"
- login "login/client"
- mu "mfw/util"
- "mime/multipart"
- "net/http"
- "net/url"
- "path"
- qu "qfw/util"
- "regexp"
- sp "spiderutil"
- "strconv"
- "strings"
- "time"
- "golang.org/x/text/encoding/simplifiedchinese"
- "golang.org/x/text/transform"
- "github.com/cjoudrey/gluahttp"
- lujson "github.com/yuin/gopher-json"
- "github.com/yuin/gopher-lua"
- )
- // 脚本
- type Script struct {
- SCode, ScriptFile string
- Encoding string
- Downloader string //下载器
- Timeout int64 //超时时间秒
- L *lua.LState
- Test_luareqcount int //脚本请求次数
- Test_goreqtime int //go发起次数(时间)
- Test_goreqlist int //go发起次数(列表)
- Test_goreqcon int //go发起次数(正文)
- }
- // 加载文件
- func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ...string) {
- s.ScriptFile = script
- options := lua.Options{
- RegistrySize: 256 * 20,
- CallStackSize: 256,
- IncludeGoStackTrace: false,
- }
- s.L = lua.NewState(options)
- //s.L.ScriptFileName = s.SCode
- s.L.PreloadModule("http", gluahttp.NewHttpModule(&http.Client{}).Loader)
- s.L.PreloadModule("json", lujson.Loader)
- if len(isfile) > 0 {
- if err := s.L.DoFile(script); err != nil {
- panic("加载lua脚本错误" + err.Error())
- }
- } else {
- if err := s.L.DoString(script); err != nil {
- panic("加载lua脚本错误" + err.Error())
- }
- }
- s.Encoding = s.GetVar("spiderPageEncoding")
- //暴露go方法
- //download(url,head) 普通下载
- s.L.SetGlobal("download", s.L.NewFunction(func(S *lua.LState) int {
- head := S.ToTable(-1)
- url := S.ToString(-2)
- ishttps := S.ToBool(-3)
- charset := S.ToString(-4)
- if charset == "" {
- charset = s.Encoding
- }
- ret := Download(downloadnode, s.Downloader, url, "get", sp.GetTable(head), charset, false, ishttps, "", s.Timeout)
- S.Push(lua.LString(ret))
- s.Test_luareqcount++
- return 1
- }))
- s.L.SetGlobal("findContentText", s.L.NewFunction(func(S *lua.LState) int {
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := sp.FindContentText(gpath, content)
- S.Push(ret)
- return 1
- }))
- //高级下载download(url,method,param,head,cookie)
- s.L.SetGlobal("downloadAdv", s.L.NewFunction(func(S *lua.LState) int {
- cookie := S.ToString(-1)
- head := S.ToTable(-2)
- param := S.ToTable(-3)
- method := S.ToString(-4)
- url := S.ToString(-5)
- ishttps := S.ToBool(-6)
- charset := S.ToString(-7)
- if charset == "" {
- charset = s.Encoding
- }
- var mycookie []*http.Cookie
- json.Unmarshal([]byte(cookie), &mycookie)
- var ret string
- var retcookie []*http.Cookie
- var headers = map[string]interface{}{}
- if param == nil {
- ptext := map[string]interface{}{"text": S.ToString(-3)}
- ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, ptext, sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
- } else {
- ret, retcookie, headers = DownloadAdv(downloadnode, s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
- }
- S.Push(lua.LString(ret))
- scookie, _ := json.Marshal(retcookie)
- S.Push(lua.LString(scookie))
- hTable := sp.MapToLuaTable(S, headers)
- S.Push(hTable)
- s.Test_luareqcount++
- return 3
- }))
- s.L.SetGlobal("downloadAdvNew", s.L.NewFunction(func(S *lua.LState) int {
- cookie := S.ToString(-1)
- head := S.ToTable(-2)
- param := S.ToTable(-3)
- method := S.ToString(-4)
- url := S.ToString(-5)
- ishttps := S.ToBool(-6)
- charset := S.ToString(-7)
- if charset == "" {
- charset = s.Encoding
- }
- var mycookie []*http.Cookie
- json.Unmarshal([]byte(cookie), &mycookie)
- var ret string
- var retcookie []*http.Cookie
- var headers = map[string]interface{}{}
- if param == nil {
- ptext := map[string]interface{}{"text": S.ToString(-3)}
- ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, ptext, sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
- } else {
- ret, retcookie, headers = DownloadAdvNew(downloadnode, s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, charset, false, ishttps, "", s.Timeout)
- }
- S.Push(lua.LString(ret))
- scookie, _ := json.Marshal(retcookie)
- S.Push(lua.LString(scookie))
- hTable := sp.MapToLuaTable(S, headers)
- S.Push(hTable)
- s.Test_luareqcount++
- return 3
- }))
- s.L.SetGlobal("findOneText", s.L.NewFunction(func(S *lua.LState) int {
- nodetype := S.ToString(-3)
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := sp.FindOneText(gpath, content, nodetype)
- S.Push(ret)
- return 1
- }))
- s.L.SetGlobal("findOneHtml", s.L.NewFunction(func(S *lua.LState) int {
- nodetype := S.ToString(-3)
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := sp.FindOneHtml(gpath, content, nodetype)
- S.Push(ret)
- return 1
- }))
- s.L.SetGlobal("findListText", s.L.NewFunction(func(S *lua.LState) int {
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := s.L.NewTable()
- sp.FindListText(gpath, content, ret)
- S.Push(ret)
- return 1
- }))
- s.L.SetGlobal("findListHtml", s.L.NewFunction(func(S *lua.LState) int {
- gpath := S.ToString(-2)
- content := S.ToString(-1)
- ret := s.L.NewTable()
- sp.FindListHtml(gpath, content, ret)
- S.Push(ret)
- return 1
- }))
- //推送列表页下载数据量
- s.L.SetGlobal("sendListNum", s.L.NewFunction(func(S *lua.LState) int {
- pageno := S.ToInt(-3) //当前页
- index := S.ToInt(-2) //第几条数据
- table := S.ToTable(-1)
- qu.Debug(s.SCode, index, table.Len())
- if index == 1 {
- if pageno == 1 { //第一页数据
- oneMap := sp.TableToMap(table)
- text, _ := json.Marshal(oneMap)
- hashText := sp.HexTextByte(text)
- qu.Debug("第一页:", hashText)
- } else if pageno == 2 { //第一页数据
- twoMap := sp.TableToMap(table)
- text, _ := json.Marshal(twoMap)
- hashText := sp.HexTextByte(text)
- qu.Debug("第二页:", hashText)
- }
- }
- return 1
- }))
- s.L.SetGlobal("findMap", s.L.NewFunction(func(S *lua.LState) int {
- qmap := S.ToTable(-2)
- content := S.ToString(-1)
- ret := s.L.NewTable()
- sp.FindMap(qmap, content, ret)
- S.Push(ret)
- return 1
- }))
- //调用jsvm
- s.L.SetGlobal("jsvm", s.L.NewFunction(func(S *lua.LState) int {
- js := S.ToString(-1)
- ret := s.L.NewTable()
- if js == "" {
- ret.RawSet(lua.LString("val"), lua.LString(""))
- ret.RawSet(lua.LString("err"), lua.LString("js is null"))
- } else {
- rep := sp.JsVmPost(sp.Config.JsVmUrl, js)
- ret.RawSet(lua.LString("val"), lua.LString(qu.ObjToString(rep["val"])))
- ret.RawSet(lua.LString("err"), lua.LString(qu.ObjToString(rep["err"])))
- }
- S.Push(ret)
- return 1
- }))
- //指定下载器
- s.L.SetGlobal("changeDownloader", s.L.NewFunction(func(S *lua.LState) int {
- s.Downloader = GetOneDownloader()
- S.Push(lua.LString(s.Downloader))
- return 1
- }))
- //手工延时
- s.L.SetGlobal("timeSleep", s.L.NewFunction(func(S *lua.LState) int {
- time.Sleep(1 * time.Second)
- return 0
- }))
- s.L.SetGlobal("runSleep", s.L.NewFunction(func(S *lua.LState) int {
- t := S.ToInt(-1)
- if t <= 0 {
- t = 1
- }
- time.Sleep(time.Duration(t) * time.Second)
- return 0
- }))
- //编码解码
- s.L.SetGlobal("transCode", s.L.NewFunction(func(S *lua.LState) int {
- codeType := strings.ToLower(S.ToString(-2))
- str := S.CheckString(-1)
- switch codeType {
- case "unicode":
- str = strings.Replace(str, "%u", "\\u", -1)
- str = transUnic(str)
- case "urlencode_gbk":
- data, _ := ioutil.ReadAll(transform.NewReader(bytes.NewReader([]byte(str)), simplifiedchinese.GBK.NewEncoder()))
- l, _ := url.Parse("http://a.com/?" + string(data))
- tmpstr := l.Query().Encode()
- if len(tmpstr) > 1 {
- str = tmpstr[0 : len(tmpstr)-1]
- } else {
- str = ""
- }
- case "urlencode_utf8":
- l, _ := url.Parse("http://a.com/?" + str)
- tmpstr := l.Query().Encode()
- if len(tmpstr) > 1 {
- str = tmpstr[0 : len(tmpstr)-1]
- } else {
- str = ""
- }
- case "urldecode_utf8":
- str, _ = url.QueryUnescape(str)
- case "decode64":
- str = sp.DecodeB64(str)
- case "encodemd5":
- str = qu.GetMd5String(str)
- case "htmldecode": //html实体码
- //txt := `<div align="left" style="margin-left: 0pt;"><span style='font-family:; font-size:13px; color:#000000'>太阳岛特勤消防站、松浦特勤消防站建设项目设计中标公示</span></div>`
- str = S.ToString(-1)
- reg, _ := regexp.Compile("&#\\d+;")
- str = reg.ReplaceAllStringFunc(str, func(src string) string {
- v, _ := strconv.Atoi(src[2 : len(src)-1])
- return string(rune(v))
- })
- }
- S.Push(lua.LString(str))
- return 1
- }))
- //保存错误日志
- s.L.SetGlobal("saveErrLog", s.L.NewFunction(func(S *lua.LState) int {
- return 0
- }))
- //添加改版日志
- s.L.SetGlobal("saveRevisionLog", s.L.NewFunction(func(S *lua.LState) int {
- return 0
- }))
- //如果服务端返回的html是gzip压缩过格式的 这里需要转一下
- s.L.SetGlobal("unGzip", s.L.NewFunction(func(S *lua.LState) int {
- html := S.ToString(-1)
- bs := []byte(html)
- gzipreader, _ := gzip.NewReader(bytes.NewReader(bs))
- bs, _ = ioutil.ReadAll(gzipreader)
- S.Push(lua.LString(bs))
- return 1
- }))
- s.L.SetGlobal("titleRepeatJudgement", s.L.NewFunction(func(S *lua.LState) int {
- bResult := false
- S.Push(lua.LBool(bResult))
- return 1
- }))
- //解析附件中的word、pdf
- s.L.SetGlobal("officeAnalysis", s.L.NewFunction(func(S *lua.LState) int {
- ext := map[string]byte{"pdf": byte(0), "doc": byte(1), "docx": byte(2)}
- str := S.ToString(-2)
- extension := S.ToString(-1)
- bs, _ := base64.StdEncoding.DecodeString(str)
- bs = append([]byte{ext[extension]}, bs...)
- msgid := mu.UUID(8)
- Msclient.Call("", msgid, mu.SERVICE_OFFICE_ANALYSIS, mu.SENDTO_TYPE_ALL_RECIVER, bs, 60)
- return 1
- }))
- //下载附件download(url,method,param,head,cookie,fileName)
- s.L.SetGlobal("downloadFile", s.L.NewFunction(func(S *lua.LState) int {
- cookie := S.ToString(-1)
- head := S.ToTable(-2)
- param := S.ToTable(-3)
- method := S.ToString(-4)
- url := S.ToString(-5)
- fileName := S.ToString(-6)
- ishttps := strings.Contains(url, "https")
- base64UrlReg := regexp.MustCompile("data:image")
- indexArr := base64UrlReg.FindStringIndex(url)
- name, size, ftype, fid := "", "", "", ""
- //base64 url
- if len(indexArr) == 2 { //base64 http://www.mmjyjt.com/data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAqAAAAOwCAYAAAD
- //截取base64
- start := indexArr[0]
- url = url[start:]
- fileName = "文件下载.jpg"
- index := strings.Index(url, ",")
- dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(url[index+1:]))
- ret, err := io.ReadAll(dec)
- if err == nil && len(ret) > 0 {
- url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, "", ret)
- }
- } else { //正常url
- var mycookie []*http.Cookie
- if cookie != "{}" {
- json.Unmarshal([]byte(cookie), &mycookie)
- } else {
- mycookie = make([]*http.Cookie, 0)
- }
- fileName = strings.TrimSpace(fileName)
- url = strings.TrimSpace(url)
- ret := DownloadFile(s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout)
- qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
- if ret == nil || len(ret) < 1024*3 {
- qu.Debug("下载文件出错!")
- } else {
- ftype = qu.GetFileType(ret)
- url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
- if (ftype == "docx" || ftype == "doc") && len(GarbledCodeReg.FindAllString(string(ret), -1)) > 10 {
- name = "附件中含有乱码"
- }
- }
- if strings.TrimSpace(ftype) == "" {
- if len(path.Ext(name)) > 0 {
- ftype = path.Ext(name)[1:]
- }
- }
- }
- S.Push(lua.LString(url))
- S.Push(lua.LString(name))
- S.Push(lua.LString(size))
- S.Push(lua.LString(ftype))
- S.Push(lua.LString(fid))
- return 5
- }))
- s.L.SetGlobal("downloadBase64File", s.L.NewFunction(func(S *lua.LState) int {
- url := S.ToString(-3)
- fileName := S.ToString(-2)
- base64Img := S.ToString(-1)
- if fileName == "" {
- fileName = "文件下载"
- }
- fileName = fileName + ".jpg"
- i := strings.Index(base64Img, ",")
- dec := base64.NewDecoder(base64.StdEncoding, strings.NewReader(base64Img[i+1:]))
- ret, err := io.ReadAll(dec)
- name, size, ftype, fid := "", "", "", ""
- if err == nil && len(ret) > 0 {
- url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
- }
- S.Push(lua.LString(url))
- S.Push(lua.LString(name))
- S.Push(lua.LString(size))
- S.Push(lua.LString(ftype))
- S.Push(lua.LString(fid))
- return 5
- }))
- //支持正则
- s.L.SetGlobal("regexp", s.L.NewFunction(func(S *lua.LState) int {
- index := int(S.ToNumber(-1))
- regstr := S.ToString(-2)
- text := S.ToString(-3)
- reg := regexp.MustCompile(regstr)
- reps := reg.FindAllStringSubmatchIndex(text, -1)
- ret := s.L.NewTable()
- number := 0
- for _, v := range reps {
- number++
- ret.Insert(number, lua.LString(text[v[index]:v[index+1]]))
- }
- S.Push(ret)
- return 1
- }))
- //支持替换
- s.L.SetGlobal("replace", s.L.NewFunction(func(S *lua.LState) int {
- n := S.ToInt(-4)
- if n <= 0 {
- n = -1
- }
- text := S.ToString(-3)
- old := S.ToString(-2)
- repl := S.ToString(-1)
- text = strings.Replace(text, old, repl, n)
- S.Push(lua.LString(text))
- return 1
- }))
- //标题的关键词、排除词过滤
- s.L.SetGlobal("pagefilterword", s.L.NewFunction(func(S *lua.LState) int {
- keyWordReg := regexp.MustCompile(sp.Config.Word["keyword"])
- notKeyWordReg := regexp.MustCompile(sp.Config.Word["notkeyword"])
- data := S.ToTable(-1)
- dataMap := sp.TableToMap(data)
- ret := s.L.NewTable()
- num := 1
- for _, v := range dataMap {
- tmp := v.(map[string]interface{})
- isOk := false
- if title := qu.ObjToString(tmp["title"]); title != "" {
- if keyWordReg.MatchString(title) && !notKeyWordReg.MatchString(title) {
- isOk = true
- }
- }
- if isOk {
- ret.Insert(num, sp.MapToLuaTable(S, tmp))
- num++
- }
- }
- S.Push(ret)
- return 1
- }))
- //标题的关键词、排除词过滤
- s.L.SetGlobal("detailfilterword", s.L.NewFunction(func(S *lua.LState) int {
- keyWordReg := regexp.MustCompile(sp.Config.Word["keyword"])
- notKeyWordReg := regexp.MustCompile(sp.Config.Word["notkeyword"])
- data := S.ToTable(-1)
- dataMap := sp.TableToMap(data)
- if title := qu.ObjToString(dataMap["title"]); title != "" {
- if keyWordReg.MatchString(title) && !notKeyWordReg.MatchString(title) {
- S.Push(lua.LBool(true))
- return 1
- } else {
- qu.Debug(s.SCode, dataMap["href"], " title error")
- }
- } else {
- qu.Debug(s.SCode, dataMap["href"], " title error")
- }
- S.Push(lua.LBool(false))
- return 1
- }))
- //detail过滤
- s.L.SetGlobal("filterdetail", s.L.NewFunction(func(S *lua.LState) int {
- /*
- 1.长度判断 (特殊处理:详情请访问原网页!;详见原网页;见原网页;无;无相关内容;无正文内容)
- 2.是否含汉字
- */
- reg1 := regexp.MustCompile("(原网页|无|无相关内容|无正文内容|详见附件|见附件)")
- reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
- detail := S.ToString(-1)
- if reg1.MatchString(detail) {
- S.Push(lua.LBool(true))
- return 1
- }
- if len([]rune(detail)) < 50 || !reg2.MatchString(detail) {
- S.Push(lua.LBool(false))
- return 1
- }
- S.Push(lua.LBool(false))
- return 1
- }))
- //匹配汉字
- s.L.SetGlobal("matchan", s.L.NewFunction(func(S *lua.LState) int {
- reg1 := regexp.MustCompile("(见附件|详见附件)")
- reg2 := regexp.MustCompile("[\u4e00-\u9fa5]")
- detail := S.ToString(-1)
- detail = reg1.ReplaceAllString(detail, "")
- ok := reg2.MatchString(detail)
- S.Push(lua.LBool(ok))
- return 1
- }))
- //base64加密
- s.L.SetGlobal("encodeBase64", s.L.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-1)
- base64Text := base64.StdEncoding.EncodeToString([]byte(text))
- S.Push(lua.LString(base64Text))
- return 1
- }))
- //base64解密
- s.L.SetGlobal("decodeBase64", s.L.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-1)
- result := ""
- byteText, err := base64.StdEncoding.DecodeString(text)
- if err == nil {
- result = string(byteText)
- }
- S.Push(lua.LString(result))
- return 1
- }))
- //GB2312字符集解码
- s.L.SetGlobal("decodeGB2312", s.L.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-1)
- result := ""
- decodedString, _, err := transform.String(simplifiedchinese.GB18030.NewDecoder(), text)
- if err == nil {
- result = decodedString
- }
- S.Push(lua.LString(result))
- return 1
- }))
- //aes cbc模式加密
- s.L.SetGlobal("aesEncryptCBC", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-3)
- key := S.ToString(-2)
- iv := S.ToString(-1)
- bytekey := []byte(key)
- byteorigData := []byte(origData)
- byteiv := []byte(iv)
- encrypted := sp.AesCBCEncrypt(byteorigData, bytekey, byteiv)
- // 将加密后的数据和初始向量进行Base64编码
- result := base64.StdEncoding.EncodeToString(encrypted)
- S.Push(lua.LString(result))
- return 1
- }))
- //aes cbc模式解密
- s.L.SetGlobal("aesDecryptCBC", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-3)
- key := S.ToString(-2)
- iv := S.ToString(-1)
- bytekey := []byte(key)
- byteiv := []byte(iv)
- data, _ := base64.StdEncoding.DecodeString(origData)
- result := sp.AesCBCDecrypter(data, bytekey, byteiv)
- S.Push(lua.LString(result))
- return 1
- }))
- //aes ecb模式加密
- s.L.SetGlobal("aesEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-2)
- key := S.ToString(-1)
- bytekey := []byte(key)
- byteorigData := []byte(origData)
- encrypted := sp.AesECBEncrypt(byteorigData, bytekey)
- result := base64.StdEncoding.EncodeToString(encrypted)
- S.Push(lua.LString(result))
- return 1
- }))
- //aes ecb模式解密
- s.L.SetGlobal("aesDecryptECB", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-2)
- key := S.ToString(-1)
- data, _ := base64.StdEncoding.DecodeString(origData)
- result := sp.AesECBDecrypter(data, []byte(key))
- S.Push(lua.LString(result))
- return 1
- }))
- //des ecb模式加密
- s.L.SetGlobal("desEncryptECB", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-2)
- key := S.ToString(-1)
- encrypted := sp.DesECBEncrypt([]byte(origData), []byte(key))
- result := base64.StdEncoding.EncodeToString(encrypted)
- S.Push(lua.LString(result))
- return 1
- }))
- //des ecb模式解密
- s.L.SetGlobal("desDecryptECB", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-2)
- key := S.ToString(-1)
- data, _ := base64.StdEncoding.DecodeString(origData)
- result := sp.DesECBDecrypter(data, []byte(key))
- S.Push(lua.LString(result))
- return 1
- }))
- //des cbc模式加密
- s.L.SetGlobal("desEncryptCBC", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-3)
- key := S.ToString(-2)
- iv := S.ToString(-1)
- bytekey := []byte(key)
- byteorigData := []byte(origData)
- byteiv := []byte(iv)
- encrypted := sp.DesCBCEncrypt(byteorigData, bytekey, byteiv)
- result := base64.StdEncoding.EncodeToString(encrypted)
- S.Push(lua.LString(result))
- return 1
- }))
- //des cbc模式解密
- s.L.SetGlobal("desDecryptCBC", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-3)
- key := S.ToString(-2)
- iv := S.ToString(-1)
- bytekey := []byte(key)
- byteiv := []byte(iv)
- data, _ := base64.StdEncoding.DecodeString(origData)
- result := sp.DesCBCDecrypter(data, bytekey, byteiv)
- S.Push(lua.LString(result))
- return 1
- }))
- //rsa 公钥加密
- s.L.SetGlobal("rsaEncrypt", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-2)
- key := S.ToString(-1)
- encrypted := sp.EncryptWithPublicKey([]byte(origData), []byte(key))
- result := base64.StdEncoding.EncodeToString(encrypted)
- S.Push(lua.LString(result))
- return 1
- }))
- //rsa 私钥解密
- s.L.SetGlobal("rsaDecrypt", s.L.NewFunction(func(S *lua.LState) int {
- origData := S.ToString(-2)
- key := S.ToString(-1)
- data, _ := base64.StdEncoding.DecodeString(origData)
- result := sp.DecryptWithPrivateKey(data, []byte(key))
- S.Push(lua.LString(result))
- return 1
- }))
- //根据正文获取发布时间
- s.L.SetGlobal("getPublishtime", s.L.NewFunction(func(S *lua.LState) int {
- detail := S.ToString(-2)
- contenthtml := S.ToString(-1)
- publishtime := sp.GetPublishtime([]string{contenthtml, detail})
- S.Push(lua.LString(publishtime))
- return 1
- }))
- //匹配
- s.L.SetGlobal("stringFind", s.L.NewFunction(func(S *lua.LState) int {
- regstr := S.ToString(-1)
- text := S.ToString(-2)
- reg := regexp.MustCompile(regstr)
- result := reg.FindString(text)
- isMatch := false
- if result != "" {
- isMatch = true
- }
- S.Push(lua.LString(result))
- S.Push(lua.LBool(isMatch))
- return 2
- }))
- //截取
- s.L.SetGlobal("stringSub", s.L.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-3)
- start := S.ToInt(-2)
- end := S.ToInt(-1)
- result := ""
- if len(text) > 0 {
- textRune := []rune(text)
- textLen := len(textRune)
- if end < 0 {
- if start > 0 { //正向截取到倒数第end位
- result = string(textRune[start-1 : textLen+1+end])
- } else if start < 0 { //反向截取 从倒数第start位截取到倒数第end位
- result = string(textRune[textLen+start : textLen+1+end])
- }
- } else if start > 0 && end >= start && end <= textLen { //从第start个截取到第end个
- result = string(textRune[start-1 : end])
- }
- // if end == -1 {
- // if start >= 1 { //正向截取到结尾
- // result = string(textRune[start-1:])
- // } else if start < 0 && textLen+start >= 0 { //反向截取后缀
- // result = string(textRune[textLen+start:])
- // }
- // } else if start >= 1 && end <= textLen { //从第start个截取到第end个
- // result = string(textRune[start-1 : end])
- // }
- }
- S.Push(lua.LString(result))
- return 1
- }))
- //长度
- s.L.SetGlobal("stringLen", s.L.NewFunction(func(S *lua.LState) int {
- text := S.ToString(-1)
- textLen := len([]rune(text))
- S.Push(lua.LNumber(textLen))
- return 1
- }))
- //去除特殊标签中间内容
- s.L.SetGlobal("getPureContent", s.L.NewFunction(func(S *lua.LState) int {
- con := S.ToString(-1)
- reg := regexp.MustCompile("(?s)<(!%-%-|!--|style).*?(%-%-|--|style)>") //注释 css
- con = reg.ReplaceAllString(con, "")
- // indexArr := reg.FindAllStringIndex(con, -1)
- // for i := len(indexArr) - 1; i >= 0; i-- {
- // if index := indexArr[i]; len(index) == 2 {
- // con = con[:index[0]] + con[index[1]:]
- // }
- // }
- S.Push(lua.LString(con))
- return 1
- }))
- //interface转string
- s.L.SetGlobal("formatToString", s.L.NewFunction(func(S *lua.LState) int {
- strNum := S.ToString(-1)
- decimalNum, _ := decimal.NewFromString(strNum)
- S.Push(lua.LString(decimalNum.String()))
- return 1
- }))
- //获取验证码
- s.L.SetGlobal("getCodeByPath", s.L.NewFunction(func(S *lua.LState) int {
- cookie := S.ToString(-1)
- head := S.ToTable(-2)
- stype := S.ToString(-3)
- path := S.ToString(-4)
- proxy := S.ToBool(-5)
- headMap := sp.GetTable(head)
- //qu.Debug("cookie----------", cookie)
- //qu.Debug("headMap----------", headMap)
- headJsonStr := ""
- headByte, err := json.Marshal(headMap)
- if err == nil {
- headJsonStr = string(headByte)
- }
- code, respHead, respCookie := codegrpc.GetCodeByPath(path, stype, headJsonStr, cookie, proxy)
- qu.Debug("GetCodeByPath code====", code)
- //qu.Debug("respHead====", respHead)
- //qu.Debug("respCookie====", respCookie)
- S.Push(lua.LString(code))
- respHeadMap := map[string]interface{}{}
- json.Unmarshal([]byte(respHead), &respHeadMap)
- hTable := sp.MapToLuaTable(S, respHeadMap)
- S.Push(hTable)
- S.Push(lua.LString(respCookie))
- return 3
- }))
- s.L.SetGlobal("goRunJs", s.L.NewFunction(func(S *lua.LState) int {
- param := S.ToString(-2) //list or detail
- step := S.ToString(-1) //参数
- result := gojs.GoRunJsGetResult(s.SCode, param, step)
- qu.Debug("Go Run Js Result:", param, step, result)
- S.Push(lua.LString(result))
- return 1
- }))
- s.L.SetGlobal("newDownloadFile", s.L.NewFunction(func(S *lua.LState) int {
- cookie := S.ToString(-1)
- head := S.ToTable(-2)
- param := S.ToTable(-3)
- method := S.ToString(-4)
- url := S.ToString(-5)
- fileName := S.ToString(-6)
- ishttps := strings.Contains(url, "https")
- var mycookie []*http.Cookie
- if cookie != "{}" {
- json.Unmarshal([]byte(cookie), &mycookie)
- } else {
- mycookie = make([]*http.Cookie, 0)
- }
- fileName = strings.TrimSpace(fileName)
- url = strings.TrimSpace(url)
- ret := NewDownloadFile(s.Downloader, url, method, sp.GetTable(param), sp.GetTable(head), mycookie, s.Encoding, false, ishttps, "", s.Timeout, false)
- name, size, ftype, fid := "", "", "", ""
- qu.Debug(GarbledCodeReg.FindAllString(string(ret), -1), len(ret))
- if ret == nil || len(ret) < 1024*3 {
- qu.Debug("下载文件出错!")
- } else {
- ftype = qu.GetFileType(ret)
- if (ftype == "docx" || ftype == "doc") && len(GarbledCodeReg.FindAllString(string(ret), -1)) > 10 {
- url, name, size, ftype, fid = "附件中含有乱码", "附件中含有乱码", "", "", ""
- } else {
- url, name, size, ftype, fid = sp.UploadFile(s.SCode, fileName, url, ret)
- }
- }
- if strings.TrimSpace(ftype) == "" {
- if len(path.Ext(name)) > 0 {
- ftype = path.Ext(name)[1:]
- }
- }
- S.Push(lua.LString(url))
- S.Push(lua.LString(name))
- S.Push(lua.LString(size))
- S.Push(lua.LString(ftype))
- S.Push(lua.LString(fid))
- return 5
- }))
- //渲染页面下载
- s.L.SetGlobal("downloadByRender", s.L.NewFunction(func(S *lua.LState) int {
- href := S.ToString(-1)
- contentHtml := sp.DownloadByRender(href)
- S.Push(lua.LString(contentHtml))
- return 1
- }))
- //渲染页面下载
- s.L.SetGlobal("getSimulateLoginInfo", s.L.NewFunction(func(S *lua.LState) int {
- param := S.ToTable(-1)
- header := S.ToTable(-2)
- byteParam, _ := json.Marshal(sp.TableToMap(param))
- headerParam, _ := json.Marshal(sp.TableToMap(header))
- stype := S.ToString(-3) //login:登陆,get:获取cookie
- qu.Debug(string(headerParam))
- qu.Debug(string(byteParam))
- cookie := login.GetSimulateLoginInfo(*site, stype, string(headerParam), string(byteParam))
- S.Push(lua.LString(cookie))
- return 1
- }))
- //辽宁省招标投标监管网
- s.L.SetGlobal("multipartRequest", s.L.NewFunction(func(S *lua.LState) int {
- yzm := S.ToString(-5)
- verifyId := S.ToString(-4)
- id := S.ToString(-3)
- param := S.ToTable(-2)
- head := S.ToTable(-1)
- paramMap := sp.GetTable(param)
- headMap := sp.GetTable(head)
- qu.Debug(paramMap)
- qu.Debug(headMap)
- paramMap = map[string]interface{}{
- "baseUrl": "https://www.lntb.gov.cn/mhback",
- "realRequestUri": "/api/cTenderNoticeController/getDetail/" + id,
- "verifyId": verifyId,
- "result": yzm,
- }
- headMap = map[string]interface{}{
- "Referer": "",
- "sec-ch-ua-mobile": "?0",
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36",
- }
- //由于验证码识别不准确,加减法呢特殊处理
- for _, w := range []string{"+", "-"} {
- if arr := strings.Split(yzm, w); len(arr) == 2 {
- if w == "+" {
- yzm = fmt.Sprint(qu.IntAll(arr[0]) + qu.IntAll(arr[1]))
- } else {
- yzm = fmt.Sprint(qu.IntAll(arr[0]) - qu.IntAll(arr[1]))
- }
- }
- }
- qu.Debug(yzm, verifyId, id)
- payload := &bytes.Buffer{}
- writer := multipart.NewWriter(payload)
- //param
- for k, v := range paramMap {
- _ = writer.WriteField(k, v.(string))
- }
- err := writer.Close()
- if err != nil {
- qu.Debug(err)
- return 1
- }
- //request
- client := &http.Client{}
- req, err := http.NewRequest("POST", "https://www.lntb.gov.cn/mhback/kk-anti-reptile/validate", payload)
- //header
- for k, v := range headMap {
- req.Header.Add(k, v.(string))
- }
- req.Header.Set("Content-Type", writer.FormDataContentType())
- res, err := client.Do(req)
- if err != nil {
- qu.Debug(err)
- return 1
- }
- defer res.Body.Close()
- body, err := ioutil.ReadAll(res.Body)
- if err != nil {
- fmt.Println(err)
- return 1
- }
- result := false
- resultMap := map[string]bool{}
- if json.Unmarshal(body, &resultMap) == nil {
- result = resultMap["result"]
- }
- fmt.Println(string(body), resultMap)
- S.Push(lua.LBool(result))
- return 1
- }))
- //chromedp下载
- s.L.SetGlobal("downloadByChrome", s.L.NewFunction(func(S *lua.LState) int {
- timeout := S.ToInt64(-2)
- taskStr := S.ToString(-1)
- cam := sp.ChromeActionMap{}
- if json.Unmarshal([]byte(taskStr), &cam) == nil {
- if len(cam.BaseActions) > 0 {
- if len(cam.RangeActions) > 0 && cam.RangeTimes > 0 {
- for times := 1; times <= cam.RangeTimes; times++ {
- cam.BaseActions = append(cam.BaseActions, cam.RangeActions...)
- }
- }
- chromeTask := sp.ChromeTask{
- TimeOut: timeout,
- Actions: cam.BaseActions,
- }
- ret := DownloadByChrome(downloadnode, s.Downloader, chromeTask, s.Timeout)
- S.Push(sp.MapToTable(S, ret))
- } else {
- S.Push(lua.LString("未设置基础循环动作"))
- }
- } else {
- S.Push(lua.LString("chrome task json 格式化错误"))
- }
- return 1
- }))
- //针对中国招标投标公共服务平台三级页瑞数加密下载方法
- s.L.SetGlobal("downloadByDataIntercept", s.L.NewFunction(func(S *lua.LState) int {
- url := S.ToString(-4)
- url_regex := S.ToString(-3)
- timeout := S.ToInt(-2)
- proxy := S.ToBool(-1)
- headers := sp.DownloadByDataIntercept(url, url_regex, timeout, proxy)
- table := sp.MapToLuaTable(S, headers)
- S.Push(table)
- return 1
- }))
- }
- func (s *Script) Reload() {
- s.L.Close()
- site := ""
- s.LoadScript(&site, "", s.ScriptFile)
- }
- // unicode转码
- func transUnic(str string) string {
- buf := bytes.NewBuffer(nil)
- i, j := 0, len(str)
- for i < j {
- x := i + 6
- if x > j {
- buf.WriteString(str[i:])
- break
- }
- if str[i] == '\\' && str[i+1] == 'u' {
- hex := str[i+2 : x]
- r, err := strconv.ParseUint(hex, 16, 64)
- if err == nil {
- buf.WriteRune(rune(r))
- } else {
- buf.WriteString(str[i:x])
- }
- i = x
- } else {
- buf.WriteByte(str[i])
- i++
- }
- }
- return buf.String()
- }
- // 取得变量
- func (s *Script) GetVar(key string) string {
- return s.L.GetGlobal(key).String()
- }
- func (s *Script) GetIntVar(key string) int {
- lv := s.L.GetGlobal(key)
- if v, ok := lv.(lua.LNumber); ok {
- return int(v)
- }
- return -1
- }
- func (s *Script) GetBoolVar(key string) bool {
- lv := s.L.GetGlobal(key)
- if v, ok := lv.(lua.LBool); ok {
- return bool(v)
- }
- return false
- }
|