123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751 |
- --[[
- 企明星爬虫系统,公共文件
- Author:a7
- Date:2016/4/7
- ]]
- local json=require "json"
- common={}
- --Lua的Eval函数
- function common.eval(script)
- script=common.clearJson(script)
- local tmp = "return "..script;
- local s = loadstring(tmp);
- if s==nil then
- return nil
- end
- return s()
- end
- --输出
- function printf(obj)
- print(dump(obj) )
- end
- function dump(obj)
- local getIndent, quoteStr, wrapKey, wrapVal, isArray, dumpObj
- getIndent = function(level)
- return string.rep("\t", level)
- end
- quoteStr = function(str)
- str = string.gsub(str, "[%c\\\"]", {
- ["\t"] = "\\t",
- ["\r"] = "\\r",
- ["\n"] = "\\n",
- ["\""] = "\\\"",
- ["\\"] = "\\\\",
- })
- return '"' .. str .. '"'
- end
- wrapKey = function(val)
- if type(val) == "number" then
- return "[" .. val .. "]"
- elseif type(val) == "string" then
- return "[" .. quoteStr(val) .. "]"
- else
- return "[" .. tostring(val) .. "]"
- end
- end
- wrapVal = function(val, level)
- if type(val) == "table" then
- return dumpObj(val, level)
- elseif type(val) == "number" then
- return val
- elseif type(val) == "string" then
- return quoteStr(val)
- else
- return tostring(val)
- end
- end
- local isArray = function(arr)
- local count = 0
- for k, v in pairs(arr) do
- count = count + 1
- end
- for i = 1, count do
- if arr[i] == nil then
- return false
- end
- end
- return true, count
- end
- dumpObj = function(obj, level)
- if type(obj) ~= "table" then
- return wrapVal(obj)
- end
- level = level + 1
- local tokens = {}
- tokens[#tokens + 1] = "{"
- local ret, count = isArray(obj)
- if ret then
- for i = 1, count do
- tokens[#tokens + 1] = getIndent(level) .. wrapVal(obj[i], level) .. ","
- end
- else
- for k, v in pairs(obj) do
- tokens[#tokens + 1] = getIndent(level) .. wrapKey(k) .. " = " .. wrapVal(v, level) .. ","
- end
- end
- tokens[#tokens + 1] = getIndent(level - 1) .. "}"
- return table.concat(tokens, "\n")
- end
- return dumpObj(obj, 0)
- end
- --JSON数据清理
- function common.clearJson(json)
- --中括号替换
- json=string.gsub(json,"%[","{")
- json=string.gsub(json,"%]","}")
- --键的引号及冒号替换
- json=string.gsub(json,"\"([^\"]*)\":","%1=")
- return json
- end
- -- 替换转义字符
- function common.replaceEscString(c)
- c=string.gsub(c,"<","<")
- c=string.gsub(c,">",">")
- c=string.gsub(c,""","'")
- c=string.gsub(c,"&","&")
- c=string.gsub(c,""","\"")
- return c
- end
- --返回通用当前日期时间
- function common.nowDate()
- return os.date("%Y-%m-%d %H:%M:%S", os.time())
- end
- --返回通用日期格式
- monthmap={["Jan"]="01",["Feb"]="02",["Mar"]="03",["Apr"]="04",["May"]="05",["June"]="06",["Jun"]="06",["July"]="07",["Jul"]="07",["Aug"]="08",["Sept"]="09",["Sep"]="09",["Oct"]="10",["Nov"]="11",["Dec"]="12"}
- -- 处理格林威治时间
- function common.timeStrByCST(strtime)
- local st=common.split(strtime," ")
- return st[6].."-"..monthmap[st[2]].."-"..st[3].." "..st[4]
- end
- --日期解析
- function common.parseDate(datestr,datetype)
- if datestr == nil then
- return "0"
- end
- local tmp = {}
- local pos=0
- for i in string.gmatch(datestr,"(%d+)") do
- tmp[pos]=i
- pos=pos+1
- end
- if table.getn(tmp) == 0 then
- return "0"
- --return os.date("%Y-%m-%d %H:%M:%S", os.time())
- end
- --判断日期值是否有误
- if tmp[0]==nil or tmp[1]==nil then
- return "0"
- end
- --月日
- if datetype=="MMdd" then
- return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).." 00:00:00"
- end
- if tmp[2] ~=nil then
- --传入的格式是:年月日(中间可以有任意分隔符)
- if datetype=="yyyyMMdd" then
- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
- end
- if tmp[3] ~=nil and tmp[4] ~=nil then
- --年月日时分
- if datetype=="yyyyMMddHHmm" then
- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
- end
- if tmp[5] ~=nil then
- --年月日时分秒
- if datetype=="yyyyMMddHHmmss" then
- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
- end
- end
- end
- end
- return "0"
- -- if datetype=="yyyyMMdd" then
- -- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
- -- --年月日时分秒
- -- elseif datetype=="yyyyMMddHHmmss" then
- -- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
- -- --年月日时分
- -- elseif datetype=="yyyyMMddHHmm" then
- -- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
- -- --月日
- -- elseif datetype=="MMdd" then
- -- return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).." 00:00:00"
- -- else
- -- return "0"
- -- end
- end
- --日期补全
- function common.padDigital(src)
- if string.len(src)<2 then
- return "0"..src
- else
- return src
- end
- end
- --local datestr="2016年05月12日22:05:04"
- --print(parseDate(datestr,"yyyyMMddHHmm"))
- --print(parseDate("4月5日","MMdd"))
- --字符日期转时间戳 原始时间字符串,要求格式yyyy-MM-dd HH:mm:ss,
- function common.strToTimestamp(str)
- --从日期字符串中截取出年月日时分秒
- if string.len(str)<19 then
- return 0
- -- return os.time()
- end
- local Y = tonumber(string.sub(str,1,4))
- local M = tonumber(string.sub(str,6,7))
- local D = tonumber(string.sub(str,9,10))
- local H = tonumber(string.sub(str,12,13))
- local MM = tonumber(string.sub(str,15,16))
- local SS = tonumber(string.sub(str,18,19))
- return os.time{year=Y, month=M, day=D, hour=H,min=MM,sec=SS}
- end
- function common.trim(s)
- if s == nil then
- return ""
- end
- return string.gsub(s, "[\r|\n| |\t]+", "")
- end
- --分割字符串
- function common.split(str, delimiter)
- local result = {}
- if str==nil or str=='' or delimiter==nil then
- return result
- end
-
- for match in (str..delimiter):gmatch("(.-)"..delimiter) do
- table.insert(result, match)
- end
- return result
- end
- --正则匹配返回值修正
- function common.regTab(con,reg)
- local tab=string.match(con,reg)
- if tab==nil then
- return ""
- else
- return tab
- end
- end
- --只验证属性字段不为空 tab1属性字段,tab2待验证对象
- function common.checkData(tab1,tab2)
- local b=true
- local str=""
- for _,v in pairs(tab1) do
- if tab2[v]==nil or tab2[v]=="" then
- str=str..v..":值空"..","
- b=false
- end
- end
- return b,str
- end
- --URL编码
- function common.decodeURI(s)
- if s == nil then
- return ""
- end
- s = string.gsub(s, '%%(%x%x)', function(h) return string.char(tonumber(h, 16)) end)
- return s
- end
- function common.encodeURI(s)
- if s == nil then
- return ""
- end
- s = string.gsub(s, "([^%w%.%- ])", function(c) return string.format("%%%02X", string.byte(c)) end)
- return string.gsub(s, " ", "+")
- end
- function common.gethref(channel,href)
- local prehttp=string.sub(channel,1,5)
- if string.lower(prehttp)=="https" then
- prehttp="https://"
- else
- prehttp="http://"
- end
- local pre=string.sub(href,1,4)
- if string.lower(pre)=="http" then
- return href
- else
- -- channel=string.sub(channel,8)
- channel=channel:match("https?://(.*)$")
- local channelpath=common.split(channel,"/")
- pre=string.sub(href,1,1)
- if pre~="." and pre~="/" then
- href = "./"..href
- end
- pre=string.sub(href,1,2)
- if pre==".." then
- local infopath=common.split(href,"%./")
- for i=1,table.getn(infopath) do
- if table.getn(channelpath)==1 then
- break
- end
- table.remove(channelpath,-1)
- end
- tmp=""
- for i=1,table.getn(channelpath) do
- tmp=tmp..channelpath[i].."/"
- end
- local infourl = infopath[table.getn(infopath)]
- href=prehttp..tmp..string.sub(infourl,0,string.len(infourl)-1)
- else
- if pre=="./" then
- table.remove(channelpath,-1)
- tmp=prehttp
- for i=1,table.getn(channelpath) do
- tmp=tmp..channelpath[i].."/"
- end
- href=tmp..string.sub(href,3)
- else
- if string.sub(href,0,1)=="/" then
- href=prehttp..channelpath[1]..href
- else
- href=prehttp..channelpath[1].."/"..href
- end
- end
- end
- return href
- end
- end
- function common.splitf(str, delimiter)
- if str==nil or str=='' or delimiter==nil then
- return nil
- end
-
- local result = {}
- for match in (str..delimiter):gmatch("(.-)"..delimiter) do
- table.insert(result, match)
- end
- return result
- end
- function common.checkUpdate(content,update)
- if update == "" or update == nil then
- return 0
- end
- local updates=common.splitf(update,"\n")
- local out=1
- for _,v in pairs(updates) do
- local vs=common.splitf(v,"==")
- if table.getn(vs)>1 then
- local item={}
- item["tmp"]=vs[1];
- local tmp=findMap(item,content)["tmp"]
- if tmp~=vs[2] then
- out=-1
- end
- end
- end
-
- if out==-1 then
- return -1
- else
- return 0
- end
- end
- --获取附件标题
- function common.getEnclosureTitle(href,content)
- local fileTitles = {}
-
- local linkList = findListHtml("a", content)
- for k,v in pairs(linkList) do
- local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
- local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
- local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
- local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
- local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
- local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
- local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
- local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
- local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
- local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
- if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
- local tempTitle = findOneText("a:eq("..tostring(k-1)..")", content)
- fileTitles[k] = tempTitle
- --table.insert(fileTitles, tempTitle)
- end
- end
- return fileTitles
- end
- --获取附件链接
- function common.getEnclosureHref(href,content)
- local hrefs = {}
-
- --href = common.gethref(href, "")
- local linkList = findListHtml("a", content)
- for k,v in pairs(linkList) do
- local tempJpg1 = string.find(v, "%.jpg$")
- local tempJpg2 = string.find(v, "%.JPG$")
- local tempBid = string.find(v, "%.bid$")
- local tempPdf = string.find(v, "%.pdf$")
- local tempDoc = string.find(v, "%.doc$")
- local tempDocx = string.find(v, "%.docx$")
- local tempXls = string.find(v, "%.xls$")
- local tempXlsx = string.find(v, "%.xlsx$")
- local tempZip = string.find(v, "%.zip$")
- local tempRar = string.find(v, "%.rar$")
- if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
- local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
- local isWholeHref = string.find(tempHref, "http")
- if isWholeHref == nil then
- tempHref = common.gethref(href, tempHref)
- --tempHref = href..tempHref
- end
- tempHref = string.gsub(tempHref, "\\", "/")
- hrefs[k] = tempHref
- --table.insert(hrefs, tempHref)
- end
- end
- return hrefs
- end
- --获取附件链接2
- function common.getEnclosureHrefByList(href,content)
- local hrefs = {}
-
- --href = common.gethref(href, "")
- local linkList = findListHtml("a", content)
- for k,v in pairs(linkList) do
- local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
- local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
- local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
- local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
- local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
- local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
- local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
- local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
- local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
- local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
- if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
- local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
- local isWholeHref = string.find(tempHref, "http")
- if isWholeHref == nil then
- tempHref = common.gethref(href, tempHref)
- --tempHref = href..tempHref
- end
- tempHref = string.gsub(tempHref, "\\", "/")
- hrefs[k] = tempHref
- --table.insert(hrefs, tempHref)
- end
- end
- return hrefs
- end
- --下载多个附件
- function common.getFileAttachmentsArray(fileNameArray,fileLinkArray)
- local attachments = {}
- for i,fileLink in pairs(fileLinkArray) do
- local url,name,size,ftype,fid=downloadFile(fileNameArray[i], fileLink, "get",{},{},"")
- local u=1
- while url=="" and u<6 do
- url,name,size,ftype,fid=downloadFile(fileNameArray[i],fileLink,"get",{},{},"")
- u=u+1
- if u==6 and url=="" then
- saveErrLog(fileLink,"comm附件下载失败")
- end
- end
- if url~=nil and url~="" and name~=nil and name~="" then
- local file = {}
- file = {
- ["url"]=url,
- ["filename"]=name,
- ["size"]=size,
- ["ftype"]=ftype,
- ["fid"]=fid
- }
- table.insert(attachments, file)
- end
- end
- return attachments
- end
- --多附件下载,跳过获取href和title集合阶段
- function common.getFileAttachmentsArrayByHrefAndContent(href,content)
- local fileTitles = common.getEnclosureTitle(href, content)
- local fileLinks = common.getEnclosureHrefByList(href, content)
- if table.getn(fileLinks) == 0 then
- fileLinks = common.getEnclosureHref(href, content)
- end
- for i,v in ipairs(fileTitles) do
- if v == "" then
- table.remove(fileTitles, i)
- table.remove(fileLinks, i)
- end
- end
- local attachments = common.getFileAttachmentsArray(fileTitles, fileLinks)
- return attachments
- end
- --确定模块的附件下载方法(获取title与href)
- --tags:模块选择器
- --withend:是否以文件类型为后缀,比如 .doc,true为后缀,false不为后缀
- filetype={"jpg","JPG","bid","pdf","png","PDF","docx","doc","xlsx","xls","zip","rar","swf","DOCX","DOC","PDF","XLSX","XLS","ZIP","RAR","SWF"}
- function common.getFilesLinkByTag(href,tags,content,withend)
- local dhtml = findOneHtml(tags, content)
- --dhtml=dhtml.."<a href='/123.doc'>123.doc</a>"
- local alist = findListHtml(tags.." a", content)
- local flist={}
- for k,v in pairs(alist) do
- local item={}
- item["href"]="a:eq("..tostring(k-1).."):attr(href)"
- item["title"]="a:eq("..tostring(k-1)..")"
- item=findMap(item,dhtml)
- item["title"]=common.trim(tostring(item["title"]))
- item["href"]=common.gethref(href,tostring(item["href"]))
- item["href"] = string.gsub(item["href"], "\\", "/")
- local isWholeHref = string.find(item["href"], "http")
- if isWholeHref == nil then
- item["href"] = transCode("utf8",item["href"])
- end
- local statehref;
- for _,ftype in pairs(filetype) do
- if withend then
- statehref=string.find(item["href"], "%."..ftype.."$")
- if statehref==nil or statehref=="" then
- statehref=string.find(item["title"], "%."..ftype.."$")
- end
- item["ftype"]="%."..ftype
- else
- statehref=string.find(item["href"], "%."..ftype)
- if statehref==nil or statehref=="" then
- statehref=string.find(item["title"], "%."..ftype)
- end
- item["ftype"]="%."..ftype
- end
- if statehref then
- break
- end
- end
-
- if statehref~=nil and item["title"]~="" then
-
- table.insert(flist,item)
- end
- end
- return flist
- end
- --确定模块的附件下载方法,封装
- function common.getFileAttachmentsArrayWithTag(href,tags,content,withend,param,head,ck)
- if param == nil or head == nil then
- param={}
- head={}
- ck=""
- end
- local attachments = {}
- --local nameTypeArr={"jpg","JPG","bid","pdf","PDF","doc","docx","xls","xlsx","zip","rar","swf","DOCX","DOC","PDF","XLS","XLSX","ZIP","RAR","SWF"}
- local titleAndHrefList = common.getFilesLinkByTag(href,tags,content,withend)
- for i,v in ipairs(titleAndHrefList) do
-
- local end_type = string.find(v["title"],v["ftype"].."$")
- local file_name = ""
- if end_type==nil or end_type=="" then
- file_name = string.match(v["title"],"(.+"..v["ftype"]..")")
- else
- file_name = v["title"]
- end
- local url,name,size,ftype,fid=downloadFile(file_name, v["href"], "get",param,head,ck)
- -- 附件原地址(默认为空)
- local init_url = v["href"]
- if url == "" then
- local u = 0
- while u < 2 do
- u = u + 1
- url,name,size,ftype,fid=downloadFile(file_name,v["href"],"get",param,head,ck)
- if url ~= "" and size ~= "" then
- u = 3 -- 下载无误 跳出循环
- end
- if u==2 and (url == "" or size == "") then
- saveErrLog(v["href"],"comm附件下载失败")
- end
- end
- end
-
- if url == "" and size == "" then
- name = file_name
- end
- if type(url) ~= "string" then
- url = ""
- end
-
- -- 下载成功, 正常返回
- if url~=nil and url~="" and name~=nil and name~="" and size ~= "" then
- local file = {}
- file = {
- ["url"]=url,
- ["filename"]=name,
- ["size"]=size,
- ["ftype"]=ftype,
- ["fid"]=fid,
- ["org_url"] = init_url
- }
- table.insert(attachments, file)
- -- 下载失败
- else
- local file = {}
- file = {
- ["filename"]=name,
- ["org_url"] = init_url
- }
- table.insert(attachments, file)
- end
- end
- return attachments
- end
- function common.getPureContent(content)
- local startChar
- local _,endChar
- local resContent = content
- while string.find(resContent, "<!%-%-")~=nil do
- startChar,_ = string.find(resContent, "<!%-%-")
- _,endChar = string.find(resContent, "%-%->")
- resContent = string.sub(resContent, 1, startChar-1)..string.sub(resContent, endChar+1, string.len(resContent))
- end
- return resContent
- end
- function common.getMoneyAndType(orgStr)
- orgStr = common.trim(orgStr)
- orgStr = string.gsub(orgStr, "(", "")
- orgStr = string.gsub(orgStr, ")", "")
- orgStr = string.gsub(orgStr, ",", "")
- local moneyType = ""
- local num =0
- local resNum =0
- if string.find(orgStr, "万") ~= nil then
- orgStr = string.gsub(orgStr, "万元", "")
- orgStr = string.gsub(orgStr, "万", "")
- if string.find(orgStr, "人民币") ~= nil then
- orgStr = string.gsub(orgStr, "人民币", "")
- orgStr = string.gsub(orgStr, "¥", "")
- orgStr = string.gsub(orgStr, "¥", "")
- moneyType = "人民币"
- elseif string.find(orgStr, "美元") ~= nil then
- orgStr = string.gsub(orgStr, "美元", "")
- orgStr = string.gsub(orgStr, "$", "")
- moneyType = "美元"
- else
- moneyType = "人民币"
- end
- local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
- orgStr=string.sub(orgStr, i, j)
- num = tonumber(orgStr)
- num = num*10000
- else
- if string.find(orgStr, "人民币") ~= nil then
- orgStr = string.gsub(orgStr, "人民币", "")
- orgStr = string.gsub(orgStr, "¥", "")
- orgStr = string.gsub(orgStr, "¥", "")
- moneyType = "人民币"
- elseif string.find(orgStr, "美元") ~= nil then
- orgStr = string.gsub(orgStr, "美元", "")
- orgStr = string.gsub(orgStr, "$", "")
- moneyType = "美元"
- else
- moneyType = "人民币"
- end
- local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
- orgStr=string.sub(orgStr, i, j)
- num = tonumber(orgStr)
- end
- local fmt = '%.' .. 2 .. 'f'
- local resNum = tonumber(string.format(fmt, num))
- return resNum, moneyType
- end
- function common.dataNil(data)
- local nameNilArr={"jsondata","href","title","publishtime","detail","contenthtml"}
- for _,name in pairs(nameNilArr) do
- if data[name] == nil then
- data[name] = ""
- if name == "jsondata" then
- data[name] = "{}"
- end
- elseif name == "jsondata" and type(data[name]) == "table" then
- local length = 0
- for key, value in pairs(data[name]) do
- length = length + 1
- end
- if length > 0 then
- data[name] = json.encode(data[name])
- else
- data[name] = "{}"
- end
- end
- end
- return data
- end
- --判断三级页是否跳到其他网站
- function common.hrefInThisWeb(href,itemHref)
- itemHref = common.gethref(href,itemHref)--标准化href
- if itemHref == "" or itemHref == nil then
- return "", false
- end
- --https开头
- local httpsindex = string.find(itemHref,"https")
- if httpsindex == 1 then
- return itemHref, common.isThisWeb(href,itemHref,9)
- end
- --http开头
- local httpindex = string.find(itemHref,"http")
- if httpindex == 1 then
- return itemHref, common.isThisWeb(href,itemHref,8)
- end
- return itemHref, false
- end
- function common.isThisWeb(href,itemHref,i)
- itemHref = string.sub(itemHref,i,string.len(itemHref)) --取http://后边的内容
- domainame = common.split(itemHref,"/")[1] --截取域名
- if domainame ~= nil and domainame ~= "" then
- index = string.find(href,domainame)
- if index ~= nil and index >= 1 then
- return true
- end
- end
- return false
- end
- --通用方法结束
- return common;
|