comm.lua 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. --[[
  2. 企明星爬虫系统,公共文件
  3. Author:a7
  4. Date:2016/4/7
  5. ]]
  6. local json=require "json"
  7. common={}
  8. --Lua的Eval函数
  9. function common.eval(script)
  10. script=common.clearJson(script)
  11. local tmp = "return "..script;
  12. local s = loadstring(tmp);
  13. if s==nil then
  14. return nil
  15. end
  16. return s()
  17. end
  18. --输出
  19. function printf(obj)
  20. print(dump(obj) )
  21. end
  22. function dump(obj)
  23. local getIndent, quoteStr, wrapKey, wrapVal, isArray, dumpObj
  24. getIndent = function(level)
  25. return string.rep("\t", level)
  26. end
  27. quoteStr = function(str)
  28. str = string.gsub(str, "[%c\\\"]", {
  29. ["\t"] = "\\t",
  30. ["\r"] = "\\r",
  31. ["\n"] = "\\n",
  32. ["\""] = "\\\"",
  33. ["\\"] = "\\\\",
  34. })
  35. return '"' .. str .. '"'
  36. end
  37. wrapKey = function(val)
  38. if type(val) == "number" then
  39. return "[" .. val .. "]"
  40. elseif type(val) == "string" then
  41. return "[" .. quoteStr(val) .. "]"
  42. else
  43. return "[" .. tostring(val) .. "]"
  44. end
  45. end
  46. wrapVal = function(val, level)
  47. if type(val) == "table" then
  48. return dumpObj(val, level)
  49. elseif type(val) == "number" then
  50. return val
  51. elseif type(val) == "string" then
  52. return quoteStr(val)
  53. else
  54. return tostring(val)
  55. end
  56. end
  57. local isArray = function(arr)
  58. local count = 0
  59. for k, v in pairs(arr) do
  60. count = count + 1
  61. end
  62. for i = 1, count do
  63. if arr[i] == nil then
  64. return false
  65. end
  66. end
  67. return true, count
  68. end
  69. dumpObj = function(obj, level)
  70. if type(obj) ~= "table" then
  71. return wrapVal(obj)
  72. end
  73. level = level + 1
  74. local tokens = {}
  75. tokens[#tokens + 1] = "{"
  76. local ret, count = isArray(obj)
  77. if ret then
  78. for i = 1, count do
  79. tokens[#tokens + 1] = getIndent(level) .. wrapVal(obj[i], level) .. ","
  80. end
  81. else
  82. for k, v in pairs(obj) do
  83. tokens[#tokens + 1] = getIndent(level) .. wrapKey(k) .. " = " .. wrapVal(v, level) .. ","
  84. end
  85. end
  86. tokens[#tokens + 1] = getIndent(level - 1) .. "}"
  87. return table.concat(tokens, "\n")
  88. end
  89. return dumpObj(obj, 0)
  90. end
  91. --JSON数据清理
  92. function common.clearJson(json)
  93. --中括号替换
  94. json=string.gsub(json,"%[","{")
  95. json=string.gsub(json,"%]","}")
  96. --键的引号及冒号替换
  97. json=string.gsub(json,"\"([^\"]*)\":","%1=")
  98. return json
  99. end
  100. -- 替换转义字符
  101. function common.replaceEscString(c)
  102. c=string.gsub(c,"&lt;","<")
  103. c=string.gsub(c,"&gt;",">")
  104. c=string.gsub(c,"&quot;","'")
  105. c=string.gsub(c,"&amp;","&")
  106. c=string.gsub(c,"&#34;","\"")
  107. return c
  108. end
  109. --返回通用当前日期时间
  110. function common.nowDate()
  111. return os.date("%Y-%m-%d %H:%M:%S", os.time())
  112. end
  113. --返回通用日期格式
  114. monthmap={["Jan"]="01",["Feb"]="02",["Mar"]="03",["Apr"]="04",["May"]="05",["June"]="06",["Jun"]="06",["July"]="07",["Jul"]="07",["Aug"]="08",["Sept"]="09",["Sep"]="09",["Oct"]="10",["Nov"]="11",["Dec"]="12"}
  115. -- 处理格林威治时间
  116. function common.timeStrByCST(strtime)
  117. local st=common.split(strtime," ")
  118. return st[6].."-"..monthmap[st[2]].."-"..st[3].." "..st[4]
  119. end
  120. --日期解析
  121. function common.parseDate(datestr,datetype)
  122. if datestr == nil then
  123. return "0"
  124. end
  125. local tmp = {}
  126. local pos=0
  127. for i in string.gmatch(datestr,"(%d+)") do
  128. tmp[pos]=i
  129. pos=pos+1
  130. end
  131. if table.getn(tmp) == 0 then
  132. return "0"
  133. --return os.date("%Y-%m-%d %H:%M:%S", os.time())
  134. end
  135. --判断日期值是否有误
  136. if tmp[0]==nil or tmp[1]==nil then
  137. return "0"
  138. end
  139. --月日
  140. if datetype=="MMdd" then
  141. return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).." 00:00:00"
  142. end
  143. if tmp[2] ~=nil then
  144. --传入的格式是:年月日(中间可以有任意分隔符)
  145. if datetype=="yyyyMMdd" then
  146. return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
  147. end
  148. if tmp[3] ~=nil and tmp[4] ~=nil then
  149. --年月日时分
  150. if datetype=="yyyyMMddHHmm" then
  151. return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
  152. end
  153. if tmp[5] ~=nil then
  154. --年月日时分秒
  155. if datetype=="yyyyMMddHHmmss" then
  156. return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
  157. end
  158. end
  159. end
  160. end
  161. return "0"
  162. -- if datetype=="yyyyMMdd" then
  163. -- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).. os.date(" %H:%M:%S", os.time())
  164. -- --年月日时分秒
  165. -- elseif datetype=="yyyyMMddHHmmss" then
  166. -- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":"..tmp[5]
  167. -- --年月日时分
  168. -- elseif datetype=="yyyyMMddHHmm" then
  169. -- return tmp[0].."-"..common.padDigital(tmp[1]).."-"..common.padDigital(tmp[2]).." "..common.padDigital(tmp[3])..":"..tmp[4]..":00"
  170. -- --月日
  171. -- elseif datetype=="MMdd" then
  172. -- return tostring(os.date("%Y",os.time())).."-"..common.padDigital(tmp[0]).."-"..common.padDigital(tmp[1]).." 00:00:00"
  173. -- else
  174. -- return "0"
  175. -- end
  176. end
  177. --日期补全
  178. function common.padDigital(src)
  179. if string.len(src)<2 then
  180. return "0"..src
  181. else
  182. return src
  183. end
  184. end
  185. --local datestr="2016年05月12日22:05:04"
  186. --print(parseDate(datestr,"yyyyMMddHHmm"))
  187. --print(parseDate("4月5日","MMdd"))
  188. --字符日期转时间戳 原始时间字符串,要求格式yyyy-MM-dd HH:mm:ss,
  189. function common.strToTimestamp(str)
  190. --从日期字符串中截取出年月日时分秒
  191. if string.len(str)<19 then
  192. return 0
  193. -- return os.time()
  194. end
  195. local Y = tonumber(string.sub(str,1,4))
  196. local M = tonumber(string.sub(str,6,7))
  197. local D = tonumber(string.sub(str,9,10))
  198. local H = tonumber(string.sub(str,12,13))
  199. local MM = tonumber(string.sub(str,15,16))
  200. local SS = tonumber(string.sub(str,18,19))
  201. return os.time{year=Y, month=M, day=D, hour=H,min=MM,sec=SS}
  202. end
  203. function common.trim(s)
  204. if s == nil then
  205. return ""
  206. end
  207. return string.gsub(s, "[\r|\n| |\t]+", "")
  208. end
  209. --分割字符串
  210. function common.split(str, delimiter)
  211. local result = {}
  212. if str==nil or str=='' or delimiter==nil then
  213. return result
  214. end
  215. for match in (str..delimiter):gmatch("(.-)"..delimiter) do
  216. table.insert(result, match)
  217. end
  218. return result
  219. end
  220. --正则匹配返回值修正
  221. function common.regTab(con,reg)
  222. local tab=string.match(con,reg)
  223. if tab==nil then
  224. return ""
  225. else
  226. return tab
  227. end
  228. end
  229. --只验证属性字段不为空 tab1属性字段,tab2待验证对象
  230. function common.checkData(tab1,tab2)
  231. local b=true
  232. local str=""
  233. for _,v in pairs(tab1) do
  234. if tab2[v]==nil or tab2[v]=="" then
  235. str=str..v..":值空"..","
  236. b=false
  237. end
  238. end
  239. return b,str
  240. end
  241. --URL编码
  242. function common.decodeURI(s)
  243. if s == nil then
  244. return ""
  245. end
  246. s = string.gsub(s, '%%(%x%x)', function(h) return string.char(tonumber(h, 16)) end)
  247. return s
  248. end
  249. function common.encodeURI(s)
  250. if s == nil then
  251. return ""
  252. end
  253. s = string.gsub(s, "([^%w%.%- ])", function(c) return string.format("%%%02X", string.byte(c)) end)
  254. return string.gsub(s, " ", "+")
  255. end
  256. function common.gethref(channel,href)
  257. local prehttp=string.sub(channel,1,5)
  258. if string.lower(prehttp)=="https" then
  259. prehttp="https://"
  260. else
  261. prehttp="http://"
  262. end
  263. local pre=string.sub(href,1,4)
  264. if string.lower(pre)=="http" then
  265. return href
  266. else
  267. -- channel=string.sub(channel,8)
  268. channel=channel:match("https?://(.*)$")
  269. local channelpath=common.split(channel,"/")
  270. pre=string.sub(href,1,1)
  271. if pre~="." and pre~="/" then
  272. href = "./"..href
  273. end
  274. pre=string.sub(href,1,2)
  275. if pre==".." then
  276. local infopath=common.split(href,"%./")
  277. for i=1,table.getn(infopath) do
  278. if table.getn(channelpath)==1 then
  279. break
  280. end
  281. table.remove(channelpath,-1)
  282. end
  283. tmp=""
  284. for i=1,table.getn(channelpath) do
  285. tmp=tmp..channelpath[i].."/"
  286. end
  287. local infourl = infopath[table.getn(infopath)]
  288. href=prehttp..tmp..string.sub(infourl,0,string.len(infourl)-1)
  289. else
  290. if pre=="./" then
  291. table.remove(channelpath,-1)
  292. tmp=prehttp
  293. for i=1,table.getn(channelpath) do
  294. tmp=tmp..channelpath[i].."/"
  295. end
  296. href=tmp..string.sub(href,3)
  297. else
  298. if string.sub(href,0,1)=="/" then
  299. href=prehttp..channelpath[1]..href
  300. else
  301. href=prehttp..channelpath[1].."/"..href
  302. end
  303. end
  304. end
  305. return href
  306. end
  307. end
  308. function common.splitf(str, delimiter)
  309. if str==nil or str=='' or delimiter==nil then
  310. return nil
  311. end
  312. local result = {}
  313. for match in (str..delimiter):gmatch("(.-)"..delimiter) do
  314. table.insert(result, match)
  315. end
  316. return result
  317. end
  318. function common.checkUpdate(content,update)
  319. if update == "" or update == nil then
  320. return 0
  321. end
  322. local updates=common.splitf(update,"\n")
  323. local out=1
  324. for _,v in pairs(updates) do
  325. local vs=common.splitf(v,"==")
  326. if table.getn(vs)>1 then
  327. local item={}
  328. item["tmp"]=vs[1];
  329. local tmp=findMap(item,content)["tmp"]
  330. if tmp~=vs[2] then
  331. out=-1
  332. end
  333. end
  334. end
  335. if out==-1 then
  336. return -1
  337. else
  338. return 0
  339. end
  340. end
  341. --获取附件标题
  342. function common.getEnclosureTitle(href,content)
  343. local fileTitles = {}
  344. local linkList = findListHtml("a", content)
  345. for k,v in pairs(linkList) do
  346. local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
  347. local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
  348. local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
  349. local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
  350. local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
  351. local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
  352. local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
  353. local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
  354. local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
  355. local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
  356. if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
  357. local tempTitle = findOneText("a:eq("..tostring(k-1)..")", content)
  358. fileTitles[k] = tempTitle
  359. --table.insert(fileTitles, tempTitle)
  360. end
  361. end
  362. return fileTitles
  363. end
  364. --获取附件链接
  365. function common.getEnclosureHref(href,content)
  366. local hrefs = {}
  367. --href = common.gethref(href, "")
  368. local linkList = findListHtml("a", content)
  369. for k,v in pairs(linkList) do
  370. local tempJpg1 = string.find(v, "%.jpg$")
  371. local tempJpg2 = string.find(v, "%.JPG$")
  372. local tempBid = string.find(v, "%.bid$")
  373. local tempPdf = string.find(v, "%.pdf$")
  374. local tempDoc = string.find(v, "%.doc$")
  375. local tempDocx = string.find(v, "%.docx$")
  376. local tempXls = string.find(v, "%.xls$")
  377. local tempXlsx = string.find(v, "%.xlsx$")
  378. local tempZip = string.find(v, "%.zip$")
  379. local tempRar = string.find(v, "%.rar$")
  380. if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
  381. local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
  382. local isWholeHref = string.find(tempHref, "http")
  383. if isWholeHref == nil then
  384. tempHref = common.gethref(href, tempHref)
  385. --tempHref = href..tempHref
  386. end
  387. tempHref = string.gsub(tempHref, "\\", "/")
  388. hrefs[k] = tempHref
  389. --table.insert(hrefs, tempHref)
  390. end
  391. end
  392. return hrefs
  393. end
  394. --获取附件链接2
  395. function common.getEnclosureHrefByList(href,content)
  396. local hrefs = {}
  397. --href = common.gethref(href, "")
  398. local linkList = findListHtml("a", content)
  399. for k,v in pairs(linkList) do
  400. local tempJpg1 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.jpg$")
  401. local tempJpg2 = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.JPG$")
  402. local tempBid = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.bid$")
  403. local tempPdf = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.pdf$")
  404. local tempDoc = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.doc$")
  405. local tempDocx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.docx$")
  406. local tempXls = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xls$")
  407. local tempXlsx = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.xlsx$")
  408. local tempZip = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.zip$")
  409. local tempRar = string.find(findOneText("a:eq("..tostring(k-1).."):attr(href)", content), "%.rar$")
  410. if tempPdf ~= nil or tempDoc ~= nil or tempDocx ~= nil or tempXls ~= nil or tempXlsx ~= nil or tempZip ~= nil or tempRar ~= nil or tempJpg1 ~= nil or tempJpg2 ~= nil or tempBid ~= nil then
  411. local tempHref = findOneText("a:eq("..tostring(k-1).."):attr(href)", content)
  412. local isWholeHref = string.find(tempHref, "http")
  413. if isWholeHref == nil then
  414. tempHref = common.gethref(href, tempHref)
  415. --tempHref = href..tempHref
  416. end
  417. tempHref = string.gsub(tempHref, "\\", "/")
  418. hrefs[k] = tempHref
  419. --table.insert(hrefs, tempHref)
  420. end
  421. end
  422. return hrefs
  423. end
  424. --下载多个附件
  425. function common.getFileAttachmentsArray(fileNameArray,fileLinkArray)
  426. local attachments = {}
  427. for i,fileLink in pairs(fileLinkArray) do
  428. local url,name,size,ftype,fid=downloadFile(fileNameArray[i], fileLink, "get",{},{},"")
  429. local u=1
  430. while url=="" and u<6 do
  431. url,name,size,ftype,fid=downloadFile(fileNameArray[i],fileLink,"get",{},{},"")
  432. u=u+1
  433. if u==6 and url=="" then
  434. saveErrLog(fileLink,"comm附件下载失败")
  435. end
  436. end
  437. if url~=nil and url~="" and name~=nil and name~="" then
  438. local file = {}
  439. file = {
  440. ["url"]=url,
  441. ["filename"]=name,
  442. ["size"]=size,
  443. ["ftype"]=ftype,
  444. ["fid"]=fid
  445. }
  446. table.insert(attachments, file)
  447. end
  448. end
  449. return attachments
  450. end
  451. --多附件下载,跳过获取href和title集合阶段
  452. function common.getFileAttachmentsArrayByHrefAndContent(href,content)
  453. local fileTitles = common.getEnclosureTitle(href, content)
  454. local fileLinks = common.getEnclosureHrefByList(href, content)
  455. if table.getn(fileLinks) == 0 then
  456. fileLinks = common.getEnclosureHref(href, content)
  457. end
  458. for i,v in ipairs(fileTitles) do
  459. if v == "" then
  460. table.remove(fileTitles, i)
  461. table.remove(fileLinks, i)
  462. end
  463. end
  464. local attachments = common.getFileAttachmentsArray(fileTitles, fileLinks)
  465. return attachments
  466. end
  467. --确定模块的附件下载方法(获取title与href)
  468. --tags:模块选择器
  469. --withend:是否以文件类型为后缀,比如 .doc,true为后缀,false不为后缀
  470. filetype={"jpg","JPG","bid","pdf","png","PDF","docx","doc","xlsx","xls","zip","rar","swf","DOCX","DOC","PDF","XLSX","XLS","ZIP","RAR","SWF"}
  471. function common.getFilesLinkByTag(href,tags,content,withend)
  472. local dhtml = findOneHtml(tags, content)
  473. --dhtml=dhtml.."<a href='/123.doc'>123.doc</a>"
  474. local alist = findListHtml(tags.." a", content)
  475. local flist={}
  476. for k,v in pairs(alist) do
  477. local item={}
  478. item["href"]="a:eq("..tostring(k-1).."):attr(href)"
  479. item["title"]="a:eq("..tostring(k-1)..")"
  480. item=findMap(item,dhtml)
  481. item["title"]=common.trim(tostring(item["title"]))
  482. item["href"]=common.gethref(href,tostring(item["href"]))
  483. item["href"] = string.gsub(item["href"], "\\", "/")
  484. local isWholeHref = string.find(item["href"], "http")
  485. if isWholeHref == nil then
  486. item["href"] = transCode("utf8",item["href"])
  487. end
  488. local statehref;
  489. for _,ftype in pairs(filetype) do
  490. if withend then
  491. statehref=string.find(item["href"], "%."..ftype.."$")
  492. if statehref==nil or statehref=="" then
  493. statehref=string.find(item["title"], "%."..ftype.."$")
  494. end
  495. item["ftype"]="%."..ftype
  496. else
  497. statehref=string.find(item["href"], "%."..ftype)
  498. if statehref==nil or statehref=="" then
  499. statehref=string.find(item["title"], "%."..ftype)
  500. end
  501. item["ftype"]="%."..ftype
  502. end
  503. if statehref then
  504. break
  505. end
  506. end
  507. if statehref~=nil and item["title"]~="" then
  508. table.insert(flist,item)
  509. end
  510. end
  511. return flist
  512. end
  513. --确定模块的附件下载方法,封装
  514. function common.getFileAttachmentsArrayWithTag(href,tags,content,withend,param,head,ck)
  515. if param == nil or head == nil then
  516. param={}
  517. head={}
  518. ck=""
  519. end
  520. local attachments = {}
  521. --local nameTypeArr={"jpg","JPG","bid","pdf","PDF","doc","docx","xls","xlsx","zip","rar","swf","DOCX","DOC","PDF","XLS","XLSX","ZIP","RAR","SWF"}
  522. local titleAndHrefList = common.getFilesLinkByTag(href,tags,content,withend)
  523. for i,v in ipairs(titleAndHrefList) do
  524. local end_type = string.find(v["title"],v["ftype"].."$")
  525. local file_name = ""
  526. if end_type==nil or end_type=="" then
  527. file_name = string.match(v["title"],"(.+"..v["ftype"]..")")
  528. else
  529. file_name = v["title"]
  530. end
  531. local url,name,size,ftype,fid=downloadFile(file_name, v["href"], "get",param,head,ck)
  532. -- 附件原地址(默认为空)
  533. local init_url = v["href"]
  534. if url == "" then
  535. local u = 0
  536. while u < 2 do
  537. u = u + 1
  538. url,name,size,ftype,fid=downloadFile(file_name,v["href"],"get",param,head,ck)
  539. if url ~= "" and size ~= "" then
  540. u = 3 -- 下载无误 跳出循环
  541. end
  542. if u==2 and (url == "" or size == "") then
  543. saveErrLog(v["href"],"comm附件下载失败")
  544. end
  545. end
  546. end
  547. if url == "" and size == "" then
  548. name = file_name
  549. end
  550. if type(url) ~= "string" then
  551. url = ""
  552. end
  553. -- 下载成功, 正常返回
  554. if url~=nil and url~="" and name~=nil and name~="" and size ~= "" then
  555. local file = {}
  556. file = {
  557. ["url"]=url,
  558. ["filename"]=name,
  559. ["size"]=size,
  560. ["ftype"]=ftype,
  561. ["fid"]=fid,
  562. ["org_url"] = init_url
  563. }
  564. table.insert(attachments, file)
  565. -- 下载失败
  566. else
  567. local file = {}
  568. file = {
  569. ["filename"]=name,
  570. ["org_url"] = init_url
  571. }
  572. table.insert(attachments, file)
  573. end
  574. end
  575. return attachments
  576. end
  577. function common.getPureContent(content)
  578. local startChar
  579. local _,endChar
  580. local resContent = content
  581. while string.find(resContent, "<!%-%-")~=nil do
  582. startChar,_ = string.find(resContent, "<!%-%-")
  583. _,endChar = string.find(resContent, "%-%->")
  584. resContent = string.sub(resContent, 1, startChar-1)..string.sub(resContent, endChar+1, string.len(resContent))
  585. end
  586. return resContent
  587. end
  588. function common.getMoneyAndType(orgStr)
  589. orgStr = common.trim(orgStr)
  590. orgStr = string.gsub(orgStr, "(", "")
  591. orgStr = string.gsub(orgStr, ")", "")
  592. orgStr = string.gsub(orgStr, ",", "")
  593. local moneyType = ""
  594. local num =0
  595. local resNum =0
  596. if string.find(orgStr, "万") ~= nil then
  597. orgStr = string.gsub(orgStr, "万元", "")
  598. orgStr = string.gsub(orgStr, "万", "")
  599. if string.find(orgStr, "人民币") ~= nil then
  600. orgStr = string.gsub(orgStr, "人民币", "")
  601. orgStr = string.gsub(orgStr, "¥", "")
  602. orgStr = string.gsub(orgStr, "¥", "")
  603. moneyType = "人民币"
  604. elseif string.find(orgStr, "美元") ~= nil then
  605. orgStr = string.gsub(orgStr, "美元", "")
  606. orgStr = string.gsub(orgStr, "$", "")
  607. moneyType = "美元"
  608. else
  609. moneyType = "人民币"
  610. end
  611. local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
  612. orgStr=string.sub(orgStr, i, j)
  613. num = tonumber(orgStr)
  614. num = num*10000
  615. else
  616. if string.find(orgStr, "人民币") ~= nil then
  617. orgStr = string.gsub(orgStr, "人民币", "")
  618. orgStr = string.gsub(orgStr, "¥", "")
  619. orgStr = string.gsub(orgStr, "¥", "")
  620. moneyType = "人民币"
  621. elseif string.find(orgStr, "美元") ~= nil then
  622. orgStr = string.gsub(orgStr, "美元", "")
  623. orgStr = string.gsub(orgStr, "$", "")
  624. moneyType = "美元"
  625. else
  626. moneyType = "人民币"
  627. end
  628. local i, j = string.find(orgStr, "[0-9]+%.*[0-9]*")
  629. orgStr=string.sub(orgStr, i, j)
  630. num = tonumber(orgStr)
  631. end
  632. local fmt = '%.' .. 2 .. 'f'
  633. local resNum = tonumber(string.format(fmt, num))
  634. return resNum, moneyType
  635. end
  636. function common.dataNil(data)
  637. local nameNilArr={"jsondata","href","title","publishtime","detail","contenthtml"}
  638. for _,name in pairs(nameNilArr) do
  639. if data[name] == nil then
  640. data[name] = ""
  641. if name == "jsondata" then
  642. data[name] = "{}"
  643. end
  644. elseif name == "jsondata" and type(data[name]) == "table" then
  645. local length = 0
  646. for key, value in pairs(data[name]) do
  647. length = length + 1
  648. end
  649. if length > 0 then
  650. data[name] = json.encode(data[name])
  651. else
  652. data[name] = "{}"
  653. end
  654. end
  655. end
  656. return data
  657. end
  658. --判断三级页是否跳到其他网站
  659. function common.hrefInThisWeb(href,itemHref)
  660. itemHref = common.gethref(href,itemHref)--标准化href
  661. if itemHref == "" or itemHref == nil then
  662. return "", false
  663. end
  664. --https开头
  665. local httpsindex = string.find(itemHref,"https")
  666. if httpsindex == 1 then
  667. return itemHref, common.isThisWeb(href,itemHref,9)
  668. end
  669. --http开头
  670. local httpindex = string.find(itemHref,"http")
  671. if httpindex == 1 then
  672. return itemHref, common.isThisWeb(href,itemHref,8)
  673. end
  674. return itemHref, false
  675. end
  676. function common.isThisWeb(href,itemHref,i)
  677. itemHref = string.sub(itemHref,i,string.len(itemHref)) --取http://后边的内容
  678. domainame = common.split(itemHref,"/")[1] --截取域名
  679. if domainame ~= nil and domainame ~= "" then
  680. index = string.find(href,domainame)
  681. if index ~= nil and index >= 1 then
  682. return true
  683. end
  684. end
  685. return false
  686. end
  687. --通用方法结束
  688. return common;