|
@@ -1,36 +1,27 @@
|
|
|
---[[抽取脚本工具类]]
|
|
|
+--[[
|
|
|
+企明星爬虫系统,公共文件
|
|
|
+Author:a7
|
|
|
+Date:2016/4/7
|
|
|
+]]
|
|
|
|
|
|
common={}
|
|
|
|
|
|
---根据field获取结果对象
|
|
|
-function common.getFieldObjects(result,field)
|
|
|
- for key, val in pairs(result) do
|
|
|
- if key==field then
|
|
|
- return val
|
|
|
- end
|
|
|
- end
|
|
|
- return nil
|
|
|
-end
|
|
|
-
|
|
|
---抽取field新增结果赋值object(field,code,ruletext,extfrom,field,value,type,matchtype)
|
|
|
-function common.setFieldObjects(result,field,object)
|
|
|
- local nofield=true
|
|
|
- for key, val in pairs(result) do
|
|
|
- if key==field then
|
|
|
- nofield=false
|
|
|
- table.insert(val, object)
|
|
|
- end
|
|
|
- end
|
|
|
- if nofield then
|
|
|
- result[field]={object}
|
|
|
+--Lua的Eval函数
|
|
|
+function common.eval(script)
|
|
|
+ script=common.clearJson(script)
|
|
|
+ local tmp = "return "..script;
|
|
|
+ local s = loadstring(tmp);
|
|
|
+ if s==nil then
|
|
|
+ return nil
|
|
|
end
|
|
|
- return result
|
|
|
+ return s()
|
|
|
end
|
|
|
|
|
|
--输出
|
|
|
function printf(obj)
|
|
|
print(dump(obj) )
|
|
|
end
|
|
|
+
|
|
|
function dump(obj)
|
|
|
local getIndent, quoteStr, wrapKey, wrapVal, isArray, dumpObj
|
|
|
getIndent = function(level)
|
|
@@ -99,6 +90,553 @@ function dump(obj)
|
|
|
return table.concat(tokens, "\n")
|
|
|
end
|
|
|
return dumpObj(obj, 0)
|
|
|
-end
|
|
|
+end
|
|
|
+
|
|
|
+--JSON数据清理
|
|
|
+function common.clearJson(json)
|
|
|
+ --中括号替换
|
|
|
+ json=string.gsub(json,"%[","{")
|
|
|
+ json=string.gsub(json,"%]","}")
|
|
|
+ --键的引号及冒号替换
|
|
|
+ json=string.gsub(json,"\"([^\"]*)\":","%1=")
|
|
|
+ return json
|
|
|
+end
|
|
|
+-- 替换转义字符
|
|
|
+function common.replaceEscString(c)
|
|
|
+ c=string.gsub(c,"<","<")
|
|
|
+ c=string.gsub(c,">",">")
|
|
|
+ c=string.gsub(c,""","'")
|
|
|
+ c=string.gsub(c,"&","&")
|
|
|
+ c=string.gsub(c,""","\"")
|
|
|
+ return c
|
|
|
+end
|
|
|
+
|
|
|
+--返回通用当前日期时间
|
|
|
+function common.nowDate()
|
|
|
+ return os.date("%Y-%m-%d %H:%M:%S", os.time())
|
|
|
+end
|
|
|
+--返回通用日期格式
|
|
|
+
|
|
|
+monthmap={["Jan"]="01",["Feb"]="02",["Mar"]="03",["Apr"]="04",["May"]="05",["June"]="06",["Jun"]="06",["July"]="07",["Jul"]="07",["Aug"]="08",["Sept"]="09",["Sep"]="09",["Oct"]="10",["Nov"]="11",["Dec"]="12"}
|
|
|
+-- 处理格林威治时间
|
|
|
+function common.timeStrByCST(strtime)
|
|
|
+ local st=common.split(strtime," ")
|
|
|
+ return st[6].."-"..monthmap[st[2]].."-"..st[3].." "..st[4]
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+--日期解析
|
|
|
+function common.parseDate(datestr,datetype)
|
|
|
+ local tmp = {}
|
|
|
+ local pos=0
|
|
|
+ for i in string.gmatch(datestr,"(%d+)") do
|
|
|
+ tmp[pos]=i
|
|
|
+ pos=pos+1
|
|
|
+ end
|
|
|
+ if table.getn(tmp) == 0 then
|
|
|
+ return "0"
|
|
|
+ --return os.date("%Y-%m-%d %H:%M:%S", os.time())
|
|
|
+ end
|
|
|
+ --传入的格式是:年月日(中间可以有任意分隔符)
|
|
|
+ if datetype=="yyyyMMdd" then
|
|
|
+ return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
|
|
|
+ --年月日时分秒
|
|
|
+ elseif datetype=="yyyyMMddHHmmss" then
|
|
|
+ return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
|
|
|
+ --年月日时分
|
|
|
+ elseif datetype=="yyyyMMddHHmm" then
|
|
|
+ return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
|
|
|
+ --月日
|
|
|
+ elseif datetype=="MMdd" then
|
|
|
+ return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).. os.date(" %H:%M:%S", os.time())
|
|
|
+ else
|
|
|
+ return "0"
|
|
|
+ end
|
|
|
+end
|
|
|
+
|
|
|
+--日期补全
|
|
|
+function common.padDigital(src)
|
|
|
+ if string.len(src)<2 then
|
|
|
+ return "0"..src
|
|
|
+ else
|
|
|
+ return src
|
|
|
+ end
|
|
|
+end
|
|
|
+--local datestr="2016年05月12日22:05:04"
|
|
|
+--print(parseDate(datestr,"yyyyMMddHHmm"))
|
|
|
+--print(parseDate("4月5日","MMdd"))
|
|
|
+
|
|
|
+--字符日期转时间戳 原始时间字符串,要求格式yyyy-MM-dd HH:mm:ss,
|
|
|
+function common.strToTimestamp(str)
|
|
|
+ --从日期字符串中截取出年月日时分秒
|
|
|
+ if string.len(str)<19 then
|
|
|
+ return 0
|
|
|
+ -- return os.time()
|
|
|
+ end
|
|
|
+ local Y = tonumber(string.sub(str,1,4))
|
|
|
+ local M = tonumber(string.sub(str,6,7))
|
|
|
+ local D = tonumber(string.sub(str,9,10))
|
|
|
+ local H = tonumber(string.sub(str,12,13))
|
|
|
+ local MM = tonumber(string.sub(str,15,16))
|
|
|
+ local SS = tonumber(string.sub(str,18,19))
|
|
|
+ return os.time{year=Y, month=M, day=D, hour=H,min=MM,sec=SS}
|
|
|
+end
|
|
|
+
|
|
|
+function common.trim(s)
|
|
|
+ return string.gsub(s, "[\r|\n| |\t]+", "")
|
|
|
+end
|
|
|
+
|
|
|
+--分割字符串
|
|
|
+function common.split(str, delimiter)
|
|
|
+ if str==nil or str=='' or delimiter==nil then
|
|
|
+ return nil
|
|
|
+ end
|
|
|
+
|
|
|
+ local result = {}
|
|
|
+ for match in (str..delimiter):gmatch("(.-)"..delimiter) do
|
|
|
+ table.insert(result, match)
|
|
|
+ end
|
|
|
+ return result
|
|
|
+end
|
|
|
+
|
|
|
+--正则匹配返回值修正
|
|
|
+function common.regTab(con,reg)
|
|
|
+ local tab=string.match(con,reg)
|
|
|
+ if tab==nil then
|
|
|
+ return ""
|
|
|
+ else
|
|
|
+ return tab
|
|
|
+ end
|
|
|
+end
|
|
|
+
|
|
|
+--只验证属性字段不为空 tab1属性字段,tab2待验证对象
|
|
|
+function common.checkData(tab1,tab2)
|
|
|
+ local b=true
|
|
|
+ local str=""
|
|
|
+ for _,v in pairs(tab1) do
|
|
|
+ if tab2[v]==nil or tab2[v]=="" then
|
|
|
+ str=str..v..":值空"..","
|
|
|
+ b=false
|
|
|
+ end
|
|
|
+ end
|
|
|
+ return b,str
|
|
|
+end
|
|
|
+
|
|
|
+--URL编码
|
|
|
+function common.decodeURI(s)
|
|
|
+ s = string.gsub(s, '%%(%x%x)', function(h) return string.char(tonumber(h, 16)) end)
|
|
|
+ return s
|
|
|
+end
|
|
|
+
|
|
|
+function common.encodeURI(s)
|
|
|
+ s = string.gsub(s, "([^%w%.%- ])", function(c) return string.format("%%%02X", string.byte(c)) end)
|
|
|
+ return string.gsub(s, " ", "+")
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+function common.gethref(channel,href)
|
|
|
+ local prehttp=string.sub(channel,1,5)
|
|
|
+ if string.lower(prehttp)=="https" then
|
|
|
+ prehttp="https://"
|
|
|
+ else
|
|
|
+ prehttp="http://"
|
|
|
+ end
|
|
|
+ local pre=string.sub(href,1,4)
|
|
|
+ if string.lower(pre)=="http" then
|
|
|
+ return href
|
|
|
+ else
|
|
|
+ -- channel=string.sub(channel,8)
|
|
|
+ channel=channel:match("https?://(.*)$")
|
|
|
+ local channelpath=common.split(channel,"/")
|
|
|
+
|
|
|
+ pre=string.sub(href,1,1)
|
|
|
+ if pre~="." and pre~="/" then
|
|
|
+ href = "./"..href
|
|
|
+ end
|
|
|
+ pre=string.sub(href,1,2)
|
|
|
+ if pre==".." then
|
|
|
+ local infopath=common.split(href,"%./")
|
|
|
+ for i=1,table.getn(infopath) do
|
|
|
+ table.remove(channelpath,-1)
|
|
|
+ end
|
|
|
+ tmp=""
|
|
|
+ for i=1,table.getn(channelpath) do
|
|
|
+ tmp=tmp..channelpath[i].."/"
|
|
|
+ end
|
|
|
+ local infourl = infopath[table.getn(infopath)]
|
|
|
+ href=prehttp..tmp..string.sub(infourl,0,string.len(infourl)-1)
|
|
|
+ else
|
|
|
+ if pre=="./" then
|
|
|
+ table.remove(channelpath,-1)
|
|
|
+ tmp=prehttp
|
|
|
+ for i=1,table.getn(channelpath) do
|
|
|
+ tmp=tmp..channelpath[i].."/"
|
|
|
+ end
|
|
|
+ href=tmp..string.sub(href,3)
|
|
|
+ else
|
|
|
+ if string.sub(href,0,1)=="/" then
|
|
|
+ href=prehttp..channelpath[1]..href
|
|
|
+ else
|
|
|
+ href=prehttp..channelpath[1].."/"..href
|
|
|
+ end
|
|
|
+ end
|
|
|
+ end
|
|
|
+ return href
|
|
|
+ end
|
|
|
+end
|
|
|
+
|
|
|
+function common.splitf(str, delimiter)
|
|
|
+ if str==nil or str=='' or delimiter==nil then
|
|
|
+ return nil
|
|
|
+ end
|
|
|
+
|
|
|
+ local result = {}
|
|
|
+ for match in (str..delimiter):gmatch("(.-)"..delimiter) do
|
|
|
+ table.insert(result, match)
|
|
|
+ end
|
|
|
+ return result
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+function common.checkUpdate(content,update)
|
|
|
+ if update == "" or update == nil then
|
|
|
+ return 0
|
|
|
+ end
|
|
|
+ local updates=common.splitf(update,"\n")
|
|
|
+ local out=1
|
|
|
+ for _,v in pairs(updates) do
|
|
|
+ local vs=common.splitf(v,"==")
|
|
|
+ if table.getn(vs)>1 then
|
|
|
+ local item={}
|
|
|
+ item["tmp"]=vs[1];
|
|
|
+ local tmp=findMap(item,content)["tmp"]
|
|
|
+ if tmp~=vs[2] then
|
|
|
+ out=-1
|
|
|
+ end
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ if out==-1 then
|
|
|
+ return -1
|
|
|
+ else
|
|
|
+ return 0
|
|
|
+ end
|
|
|
+end
|
|
|
+
|
|
|
+--获取附件标题
|
|
|
+function common.getEnclosureTitle(href,content)
|
|
|
+ local fileTitles = {}
|
|
|
+
|
|
|
+ local linkList = findListHtml("a", content)
|
|
|
+
|
|
|
+ for k,v in pairs(linkList) do
|
|
|
+ local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
|
|
|
+ local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
|
|
|
+ local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
|
|
|
+ local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
|
|
|
+ local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
|
|
|
+ local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
|
|
|
+ local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
|
|
|
+ local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
|
|
|
+ local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
|
|
|
+ local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
|
|
|
+
|
|
|
+ if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
|
|
|
+ local tempTitle = findOneText("a:eq("..tostring(k-1)..")", content)
|
|
|
+ fileTitles[k] = tempTitle
|
|
|
+ --table.insert(fileTitles, tempTitle)
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ return fileTitles
|
|
|
+end
|
|
|
+
|
|
|
+--获取附件链接
|
|
|
+function common.getEnclosureHref(href,content)
|
|
|
+ local hrefs = {}
|
|
|
+
|
|
|
+ --href = common.gethref(href, "")
|
|
|
+ local linkList = findListHtml("a", content)
|
|
|
+
|
|
|
+ for k,v in pairs(linkList) do
|
|
|
+ local tempJpg1 = string.find(v, "%.jpg$")
|
|
|
+ local tempJpg2 = string.find(v, "%.JPG$")
|
|
|
+ local tempBid = string.find(v, "%.bid$")
|
|
|
+ local tempPdf = string.find(v, "%.pdf$")
|
|
|
+ local tempDoc = string.find(v, "%.doc$")
|
|
|
+ local tempDocx = string.find(v, "%.docx$")
|
|
|
+ local tempXls = string.find(v, "%.xls$")
|
|
|
+ local tempXlsx = string.find(v, "%.xlsx$")
|
|
|
+ local tempZip = string.find(v, "%.zip$")
|
|
|
+ local tempRar = string.find(v, "%.rar$")
|
|
|
+
|
|
|
+ if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
|
|
|
+ local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
|
|
|
+ local isWholeHref = string.find(tempHref, "http")
|
|
|
+ if isWholeHref == nil then
|
|
|
+ tempHref = common.gethref(href, tempHref)
|
|
|
+ --tempHref = href..tempHref
|
|
|
+ end
|
|
|
+ tempHref = string.gsub(tempHref, "\\", "/")
|
|
|
+ hrefs[k] = tempHref
|
|
|
+ --table.insert(hrefs, tempHref)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ return hrefs
|
|
|
+end
|
|
|
+
|
|
|
+--获取附件链接2
|
|
|
+function common.getEnclosureHrefByList(href,content)
|
|
|
+ local hrefs = {}
|
|
|
+
|
|
|
+ --href = common.gethref(href, "")
|
|
|
+ local linkList = findListHtml("a", content)
|
|
|
+
|
|
|
+ for k,v in pairs(linkList) do
|
|
|
+ local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
|
|
|
+ local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
|
|
|
+ local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
|
|
|
+ local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
|
|
|
+ local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
|
|
|
+ local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
|
|
|
+ local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
|
|
|
+ local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
|
|
|
+ local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
|
|
|
+ local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
|
|
|
+
|
|
|
+ if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
|
|
|
+ local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
|
|
|
+ local isWholeHref = string.find(tempHref, "http")
|
|
|
+ if isWholeHref == nil then
|
|
|
+ tempHref = common.gethref(href, tempHref)
|
|
|
+ --tempHref = href..tempHref
|
|
|
+ end
|
|
|
+ tempHref = string.gsub(tempHref, "\\", "/")
|
|
|
+ hrefs[k] = tempHref
|
|
|
+ --table.insert(hrefs, tempHref)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ return hrefs
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+--下载多个附件
|
|
|
+function common.getFileAttachmentsArray(fileNameArray,fileLinkArray)
|
|
|
+ local attachments = {}
|
|
|
+ for i,fileLink in pairs(fileLinkArray) do
|
|
|
+ local url,name,size,ftype,fid=downloadFile(fileNameArray[i], fileLink, "get",{},{},"")
|
|
|
+ local u=1
|
|
|
+ while url=="" and u<6 do
|
|
|
+ url,name,size,ftype,fid=downloadFile(fileNameArray[i],fileLink,"get",{},{},"")
|
|
|
+ u=u+1
|
|
|
+ if u==6 and url=="" then
|
|
|
+ saveErrLog(fileLink,"comm附件下载失败")
|
|
|
+ end
|
|
|
+ end
|
|
|
+ if url~=nil and url~="" and name~=nil and name~="" then
|
|
|
+ local file = {}
|
|
|
+ file = {
|
|
|
+ ["url"]=url,
|
|
|
+ ["filename"]=name,
|
|
|
+ ["size"]=size,
|
|
|
+ ["ftype"]=ftype,
|
|
|
+ ["fid"]=fid
|
|
|
+ }
|
|
|
+ table.insert(attachments, file)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ return attachments
|
|
|
+end
|
|
|
+
|
|
|
+--多附件下载,跳过获取href和title集合阶段
|
|
|
+function common.getFileAttachmentsArrayByHrefAndContent(href,content)
|
|
|
+ local fileTitles = common.getEnclosureTitle(href, content)
|
|
|
+ local fileLinks = common.getEnclosureHrefByList(href, content)
|
|
|
+ if table.getn(fileLinks) == 0 then
|
|
|
+ fileLinks = common.getEnclosureHref(href, content)
|
|
|
+ end
|
|
|
+ for i,v in ipairs(fileTitles) do
|
|
|
+ if v == "" then
|
|
|
+ table.remove(fileTitles, i)
|
|
|
+ table.remove(fileLinks, i)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ local attachments = common.getFileAttachmentsArray(fileTitles, fileLinks)
|
|
|
+
|
|
|
+ return attachments
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+--确定模块的附件下载方法(获取title与href)
|
|
|
+--tags:模块选择器
|
|
|
+--withend:是否以文件类型为后缀,比如 .doc,true为后缀,false不为后缀
|
|
|
+filetype={"jpg","JPG","bid","pdf","PDF","doc","docx","xls","xlsx","zip","rar","swf","DOCX","DOC","PDF","XLS","XLSX","ZIP","RAR","SWF"}
|
|
|
+function common.getFilesLinkByTag(href,tags,content,withend)
|
|
|
+ local dhtml = findOneHtml(tags, content)
|
|
|
+ --dhtml=dhtml.."<a href='/123.doc'>123.doc</a>"
|
|
|
+ local alist = findListHtml(tags.." a", content)
|
|
|
+ local flist={}
|
|
|
+ for k,v in pairs(alist) do
|
|
|
+ local item={}
|
|
|
+ item["href"]="a:eq("..tostring(k-1).."):attr(href)"
|
|
|
+ item["title"]="a:eq("..tostring(k-1)..")"
|
|
|
+ item=findMap(item,dhtml)
|
|
|
+ item["title"]=common.trim(tostring(item["title"]))
|
|
|
+ item["href"]=common.gethref(href,tostring(item["href"]))
|
|
|
+ item["href"] = string.gsub(item["href"], "\\", "/")
|
|
|
+ local isWholeHref = string.find(item["href"], "http")
|
|
|
+ if isWholeHref == nil then
|
|
|
+ item["href"] = transCode("utf8",item["href"])
|
|
|
+ end
|
|
|
+ local statehref;
|
|
|
+ for _,ftype in pairs(filetype) do
|
|
|
+ if withend then
|
|
|
+ statehref=string.find(item["href"], "%."..ftype.."$")
|
|
|
+ if statehref==nil or statehref=="" then
|
|
|
+ statehref=string.find(item["title"], "%."..ftype.."$")
|
|
|
+ end
|
|
|
+ else
|
|
|
+ statehref=string.find(item["href"], "%."..ftype)
|
|
|
+ if statehref==nil or statehref=="" then
|
|
|
+ statehref=string.find(item["title"], "%."..ftype)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ if statehref then
|
|
|
+ break
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ if statehref~=nil and item["title"]~="" then
|
|
|
+ table.insert(flist,item)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ return flist
|
|
|
+end
|
|
|
+
|
|
|
+--确定模块的附件下载方法,封装
|
|
|
+function common.getFileAttachmentsArrayWithTag(href,tags,content,withend)
|
|
|
+ local attachments = {}
|
|
|
+ --local nameTypeArr={"jpg","JPG","bid","pdf","PDF","doc","docx","xls","xlsx","zip","rar","swf","DOCX","DOC","PDF","XLS","XLSX","ZIP","RAR","SWF"}
|
|
|
+ local titleAndHrefList = common.getFilesLinkByTag(href,tags,content,withend)
|
|
|
+ for i,v in ipairs(titleAndHrefList) do
|
|
|
+ local url,name,size,ftype,fid=downloadFile(v["title"], v["href"], "get",{},{},"")
|
|
|
+ -- 附件原地址(默认为空)
|
|
|
+ local init_url = v["href"]
|
|
|
+ if url == "" then
|
|
|
+ local u = 0
|
|
|
+ while u < 6 do
|
|
|
+ u = u + 1
|
|
|
+ url,name,size,ftype,fid=downloadFile(v["title"],v["href"],"get",{},{},"")
|
|
|
+ if url ~= "" and size ~= "" then
|
|
|
+ u = 7 -- 下载无误 跳出循环
|
|
|
+ end
|
|
|
+ if u==6 and (url == "" or size == "") then
|
|
|
+ saveErrLog(v["href"],"comm附件下载失败")
|
|
|
+ end
|
|
|
+ end
|
|
|
+ end
|
|
|
+
|
|
|
+ if url == "" and size == "" then
|
|
|
+ name = v["title"]
|
|
|
+ end
|
|
|
+
|
|
|
+ if type(url) ~= "string" then
|
|
|
+ url = ""
|
|
|
+ end
|
|
|
+
|
|
|
+ -- 下载成功, 正常返回
|
|
|
+ if url~=nil and url~="" and name~=nil and name~="" and size ~= "" then
|
|
|
+ local file = {}
|
|
|
+ file = {
|
|
|
+ ["url"]=url,
|
|
|
+ ["filename"]=name,
|
|
|
+ ["size"]=size,
|
|
|
+ ["ftype"]=ftype,
|
|
|
+ ["fid"]=fid,
|
|
|
+ ["org_url"] = init_url
|
|
|
+ }
|
|
|
+ table.insert(attachments, file)
|
|
|
+ -- 下载失败
|
|
|
+ else
|
|
|
+ local file = {}
|
|
|
+ file = {
|
|
|
+ ["filename"]=name,
|
|
|
+ ["org_url"] = init_url
|
|
|
+ }
|
|
|
+ table.insert(attachments, file)
|
|
|
+ end
|
|
|
+ end
|
|
|
+ return attachments
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+function common.getPureContent(content)
|
|
|
+ local startChar
|
|
|
+ local _,endChar
|
|
|
+ local resContent = content
|
|
|
+ while string.find(resContent, "<!%-%-")~=nil do
|
|
|
+ startChar,_ = string.find(resContent, "<!%-%-")
|
|
|
+ _,endChar = string.find(resContent, "%-%->")
|
|
|
+ resContent = string.sub(resContent, 1, startChar-1)..string.sub(resContent, endChar+1, string.len(resContent))
|
|
|
+ end
|
|
|
+ return resContent
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+function common.getMoneyAndType(orgStr)
|
|
|
+ orgStr = common.trim(orgStr)
|
|
|
+ orgStr = string.gsub(orgStr, "(", "")
|
|
|
+ orgStr = string.gsub(orgStr, ")", "")
|
|
|
+ orgStr = string.gsub(orgStr, ",", "")
|
|
|
+ local moneyType = ""
|
|
|
+ local num =0
|
|
|
+ local resNum =0
|
|
|
+ if string.find(orgStr, "万") ~= nil then
|
|
|
+ orgStr = string.gsub(orgStr, "万元", "")
|
|
|
+ orgStr = string.gsub(orgStr, "万", "")
|
|
|
+ if string.find(orgStr, "人民币") ~= nil then
|
|
|
+ orgStr = string.gsub(orgStr, "人民币", "")
|
|
|
+ orgStr = string.gsub(orgStr, "¥", "")
|
|
|
+ orgStr = string.gsub(orgStr, "¥", "")
|
|
|
+ moneyType = "人民币"
|
|
|
+ elseif string.find(orgStr, "美元") ~= nil then
|
|
|
+ orgStr = string.gsub(orgStr, "美元", "")
|
|
|
+ orgStr = string.gsub(orgStr, "$", "")
|
|
|
+ moneyType = "美元"
|
|
|
+ else
|
|
|
+ moneyType = "人民币"
|
|
|
+ end
|
|
|
+ local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
|
|
|
+ orgStr=string.sub(orgStr, i, j)
|
|
|
+ num = tonumber(orgStr)
|
|
|
+ num = num*10000
|
|
|
+ else
|
|
|
+ if string.find(orgStr, "人民币") ~= nil then
|
|
|
+ orgStr = string.gsub(orgStr, "人民币", "")
|
|
|
+ orgStr = string.gsub(orgStr, "¥", "")
|
|
|
+ orgStr = string.gsub(orgStr, "¥", "")
|
|
|
+ moneyType = "人民币"
|
|
|
+ elseif string.find(orgStr, "美元") ~= nil then
|
|
|
+ orgStr = string.gsub(orgStr, "美元", "")
|
|
|
+ orgStr = string.gsub(orgStr, "$", "")
|
|
|
+ moneyType = "美元"
|
|
|
+ else
|
|
|
+ moneyType = "人民币"
|
|
|
+ end
|
|
|
+ local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
|
|
|
+ orgStr=string.sub(orgStr, i, j)
|
|
|
+ num = tonumber(orgStr)
|
|
|
+
|
|
|
+ end
|
|
|
+
|
|
|
+ local fmt = '%.' .. 2 .. 'f'
|
|
|
+ local resNum = tonumber(string.format(fmt, num))
|
|
|
+
|
|
|
+ return resNum, moneyType
|
|
|
+end
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
--通用方法结束
|
|
|
return common;
|