tools.py 70 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2018-09-06 14:21
  4. ---------
  5. @summary: 工具
  6. ---------
  7. @author: Boris
  8. @email: boris_liu@foxmail.com
  9. """
  10. import asyncio
  11. import calendar
  12. import codecs
  13. import configparser # 读配置文件的
  14. import datetime
  15. import functools
  16. import hashlib
  17. import html
  18. import importlib
  19. import inspect
  20. import json
  21. import os
  22. import pickle
  23. import random
  24. import re
  25. import socket
  26. import ssl
  27. import string
  28. import sys
  29. import time
  30. import traceback
  31. import urllib
  32. import urllib.parse
  33. import uuid
  34. import weakref
  35. from functools import partial, wraps
  36. from hashlib import md5
  37. from pprint import pformat
  38. from pprint import pprint
  39. from urllib import request
  40. from urllib.parse import urljoin
  41. import bson
  42. import redis
  43. import requests
  44. import six
  45. from requests.cookies import RequestsCookieJar
  46. from w3lib.url import canonicalize_url as _canonicalize_url
  47. import feapder.setting as setting
  48. from feapder.db.redisdb import RedisDB
  49. from feapder.utils.email_sender import EmailSender
  50. from feapder.utils.log import log
  51. try:
  52. import execjs # pip install PyExecJS
  53. except Exception as e:
  54. pass
  55. os.environ["EXECJS_RUNTIME"] = "Node" # 设置使用node执行js
  56. # 全局取消ssl证书验证
  57. ssl._create_default_https_context = ssl._create_unverified_context
  58. TIME_OUT = 30
  59. TIMER_TIME = 5
  60. redisdb = None
  61. def get_redisdb():
  62. global redisdb
  63. if not redisdb:
  64. redisdb = RedisDB()
  65. return redisdb
  66. # 装饰器
  67. class Singleton(object):
  68. def __init__(self, cls):
  69. self._cls = cls
  70. self._instance = {}
  71. def __call__(self, *args, **kwargs):
  72. if self._cls not in self._instance:
  73. self._instance[self._cls] = self._cls(*args, **kwargs)
  74. return self._instance[self._cls]
  75. def log_function_time(func):
  76. try:
  77. @functools.wraps(func) # 将函数的原来属性付给新函数
  78. def calculate_time(*args, **kw):
  79. began_time = time.time()
  80. callfunc = func(*args, **kw)
  81. end_time = time.time()
  82. log.debug(func.__name__ + " run time = " + str(end_time - began_time))
  83. return callfunc
  84. return calculate_time
  85. except:
  86. log.debug("求取时间无效 因为函数参数不符")
  87. return func
  88. def run_safe_model(module_name):
  89. def inner_run_safe_model(func):
  90. try:
  91. @functools.wraps(func) # 将函数的原来属性付给新函数
  92. def run_func(*args, **kw):
  93. callfunc = None
  94. try:
  95. callfunc = func(*args, **kw)
  96. except Exception as e:
  97. log.error(module_name + ": " + func.__name__ + " - " + str(e))
  98. traceback.print_exc()
  99. return callfunc
  100. return run_func
  101. except Exception as e:
  102. log.error(module_name + ": " + func.__name__ + " - " + str(e))
  103. traceback.print_exc()
  104. return func
  105. return inner_run_safe_model
  106. def memoizemethod_noargs(method):
  107. """Decorator to cache the result of a method (without arguments) using a
  108. weak reference to its object
  109. """
  110. cache = weakref.WeakKeyDictionary()
  111. @functools.wraps(method)
  112. def new_method(self, *args, **kwargs):
  113. if self not in cache:
  114. cache[self] = method(self, *args, **kwargs)
  115. return cache[self]
  116. return new_method
  117. ########################【网页解析相关】###############################
  118. # @log_function_time
  119. def get_html_by_requests(
  120. url, headers=None, code="utf-8", data=None, proxies={}, with_response=False
  121. ):
  122. html = ""
  123. r = None
  124. try:
  125. if data:
  126. r = requests.post(
  127. url, headers=headers, timeout=TIME_OUT, data=data, proxies=proxies
  128. )
  129. else:
  130. r = requests.get(url, headers=headers, timeout=TIME_OUT, proxies=proxies)
  131. if code:
  132. r.encoding = code
  133. html = r.text
  134. except Exception as e:
  135. log.error(e)
  136. finally:
  137. r and r.close()
  138. if with_response:
  139. return html, r
  140. else:
  141. return html
  142. def get_json_by_requests(
  143. url,
  144. params=None,
  145. headers=None,
  146. data=None,
  147. proxies={},
  148. with_response=False,
  149. cookies=None,
  150. ):
  151. json = {}
  152. response = None
  153. try:
  154. # response = requests.get(url, params = params)
  155. if data:
  156. response = requests.post(
  157. url,
  158. headers=headers,
  159. data=data,
  160. params=params,
  161. timeout=TIME_OUT,
  162. proxies=proxies,
  163. cookies=cookies,
  164. )
  165. else:
  166. response = requests.get(
  167. url,
  168. headers=headers,
  169. params=params,
  170. timeout=TIME_OUT,
  171. proxies=proxies,
  172. cookies=cookies,
  173. )
  174. response.encoding = "utf-8"
  175. json = response.json()
  176. except Exception as e:
  177. log.error(e)
  178. finally:
  179. response and response.close()
  180. if with_response:
  181. return json, response
  182. else:
  183. return json
  184. def get_cookies(response):
  185. cookies = requests.utils.dict_from_cookiejar(response.cookies)
  186. return cookies
  187. def get_cookies_from_str(cookie_str):
  188. """
  189. >>> get_cookies_from_str("key=value; key2=value2; key3=; key4=; ")
  190. {'key': 'value', 'key2': 'value2', 'key3': '', 'key4': ''}
  191. Args:
  192. cookie_str: key=value; key2=value2; key3=; key4=
  193. Returns:
  194. """
  195. cookies = {}
  196. for cookie in cookie_str.split(";"):
  197. cookie = cookie.strip()
  198. if not cookie:
  199. continue
  200. key, value = cookie.split("=", 1)
  201. key = key.strip()
  202. value = value.strip()
  203. cookies[key] = value
  204. return cookies
  205. def get_cookies_jar(cookies):
  206. """
  207. @summary: 适用于selenium生成的cookies转requests的cookies
  208. requests.get(xxx, cookies=jar)
  209. 参考:https://www.cnblogs.com/small-bud/p/9064674.html
  210. ---------
  211. @param cookies: [{},{}]
  212. ---------
  213. @result: cookie jar
  214. """
  215. cookie_jar = RequestsCookieJar()
  216. for cookie in cookies:
  217. cookie_jar.set(cookie["name"], cookie["value"])
  218. return cookie_jar
  219. def get_cookies_from_selenium_cookie(cookies):
  220. """
  221. @summary: 适用于selenium生成的cookies转requests的cookies
  222. requests.get(xxx, cookies=jar)
  223. 参考:https://www.cnblogs.com/small-bud/p/9064674.html
  224. ---------
  225. @param cookies: [{},{}]
  226. ---------
  227. @result: cookie jar
  228. """
  229. cookie_dict = {}
  230. for cookie in cookies:
  231. if cookie.get("name"):
  232. cookie_dict[cookie["name"]] = cookie["value"]
  233. return cookie_dict
  234. def cookiesjar2str(cookies):
  235. str_cookie = ""
  236. for k, v in requests.utils.dict_from_cookiejar(cookies).items():
  237. str_cookie += k
  238. str_cookie += "="
  239. str_cookie += v
  240. str_cookie += "; "
  241. return str_cookie
  242. def cookies2str(cookies):
  243. str_cookie = ""
  244. for k, v in cookies.items():
  245. str_cookie += k
  246. str_cookie += "="
  247. str_cookie += v
  248. str_cookie += "; "
  249. return str_cookie
  250. def get_urls(
  251. html,
  252. stop_urls=(
  253. "javascript",
  254. "+",
  255. ".css",
  256. ".js",
  257. ".rar",
  258. ".xls",
  259. ".exe",
  260. ".apk",
  261. ".doc",
  262. ".jpg",
  263. ".png",
  264. ".flv",
  265. ".mp4",
  266. ),
  267. ):
  268. # 不匹配javascript、 +、 # 这样的url
  269. regex = r'<a.*?href.*?=.*?["|\'](.*?)["|\']'
  270. urls = get_info(html, regex)
  271. urls = sorted(set(urls), key=urls.index)
  272. if stop_urls:
  273. stop_urls = isinstance(stop_urls, str) and [stop_urls] or stop_urls
  274. use_urls = []
  275. for url in urls:
  276. for stop_url in stop_urls:
  277. if stop_url in url:
  278. break
  279. else:
  280. use_urls.append(url)
  281. urls = use_urls
  282. return urls
  283. def get_full_url(root_url, sub_url):
  284. """
  285. @summary: 得到完整的ur
  286. ---------
  287. @param root_url: 根url (网页的url)
  288. @param sub_url: 子url (带有相对路径的 可以拼接成完整的)
  289. ---------
  290. @result: 返回完整的url
  291. """
  292. return urljoin(root_url, sub_url)
  293. def joint_url(url, params):
  294. # param_str = "?"
  295. # for key, value in params.items():
  296. # value = isinstance(value, str) and value or str(value)
  297. # param_str += key + "=" + value + "&"
  298. #
  299. # return url + param_str[:-1]
  300. if not params:
  301. return url
  302. params = urlencode(params)
  303. separator = "?" if "?" not in url else "&"
  304. return url + separator + params
  305. def canonicalize_url(url):
  306. """
  307. url 归一化 会参数排序 及去掉锚点
  308. """
  309. return _canonicalize_url(url)
  310. def get_url_md5(url):
  311. url = canonicalize_url(url)
  312. url = re.sub("^http://", "https://", url)
  313. return get_md5(url)
  314. def fit_url(urls, identis):
  315. identis = isinstance(identis, str) and [identis] or identis
  316. fit_urls = []
  317. for link in urls:
  318. for identi in identis:
  319. if identi in link:
  320. fit_urls.append(link)
  321. return list(set(fit_urls))
  322. def get_param(url, key):
  323. params = url.split("?")[-1].split("&")
  324. for param in params:
  325. key_value = param.split("=", 1)
  326. if key == key_value[0]:
  327. return key_value[1]
  328. return None
  329. def urlencode(params):
  330. """
  331. 字典类型的参数转为字符串
  332. @param params:
  333. {
  334. 'a': 1,
  335. 'b': 2
  336. }
  337. @return: a=1&b=2
  338. """
  339. return urllib.parse.urlencode(params)
  340. def urldecode(url):
  341. """
  342. 将字符串类型的参数转为json
  343. @param url: xxx?a=1&b=2
  344. @return:
  345. {
  346. 'a': 1,
  347. 'b': 2
  348. }
  349. """
  350. params_json = {}
  351. params = url.split("?")[-1].split("&")
  352. for param in params:
  353. key, value = param.split("=")
  354. params_json[key] = unquote_url(value)
  355. return params_json
  356. def unquote_url(url, encoding="utf-8"):
  357. """
  358. @summary: 将url解码
  359. ---------
  360. @param url:
  361. ---------
  362. @result:
  363. """
  364. return urllib.parse.unquote(url, encoding=encoding)
  365. def quote_url(url, encoding="utf-8"):
  366. """
  367. @summary: 将url编码 编码意思http://www.w3school.com.cn/tags/html_ref_urlencode.html
  368. ---------
  369. @param url:
  370. ---------
  371. @result:
  372. """
  373. return urllib.parse.quote(url, safe="%;/?:@&=+$,", encoding=encoding)
  374. def quote_chinese_word(text, encoding="utf-8"):
  375. def quote_chinese_word_func(text):
  376. chinese_word = text.group(0)
  377. return urllib.parse.quote(chinese_word, encoding=encoding)
  378. return re.sub("([\u4e00-\u9fa5]+)", quote_chinese_word_func, text, flags=re.S)
  379. def unescape(str):
  380. """
  381. 反转译
  382. """
  383. return html.unescape(str)
  384. def excape(str):
  385. """
  386. 转译
  387. """
  388. return html.escape(str)
  389. _regexs = {}
  390. # @log_function_time
  391. def get_info(html, regexs, allow_repeat=True, fetch_one=False, split=None):
  392. regexs = isinstance(regexs, str) and [regexs] or regexs
  393. infos = []
  394. for regex in regexs:
  395. if regex == "":
  396. continue
  397. if regex not in _regexs.keys():
  398. _regexs[regex] = re.compile(regex, re.S)
  399. if fetch_one:
  400. infos = _regexs[regex].search(html)
  401. if infos:
  402. infos = infos.groups()
  403. else:
  404. continue
  405. else:
  406. infos = _regexs[regex].findall(str(html))
  407. if len(infos) > 0:
  408. # print(regex)
  409. break
  410. if fetch_one:
  411. infos = infos if infos else ("",)
  412. return infos if len(infos) > 1 else infos[0]
  413. else:
  414. infos = allow_repeat and infos or sorted(set(infos), key=infos.index)
  415. infos = split.join(infos) if split else infos
  416. return infos
  417. def table_json(table, save_one_blank=True):
  418. """
  419. 将表格转为json 适应于 key:value 在一行类的表格
  420. @param table: 使用selector封装后的具有xpath的selector
  421. @param save_one_blank: 保留一个空白符
  422. @return:
  423. """
  424. data = {}
  425. trs = table.xpath(".//tr")
  426. for tr in trs:
  427. tds = tr.xpath("./td|./th")
  428. for i in range(0, len(tds), 2):
  429. if i + 1 > len(tds) - 1:
  430. break
  431. key = tds[i].xpath("string(.)").extract_first(default="").strip()
  432. value = tds[i + 1].xpath("string(.)").extract_first(default="").strip()
  433. value = replace_str(value, "[\f\n\r\t\v]", "")
  434. value = replace_str(value, " +", " " if save_one_blank else "")
  435. if key:
  436. data[key] = value
  437. return data
  438. def get_table_row_data(table):
  439. """
  440. 获取表格里每一行数据
  441. @param table: 使用selector封装后的具有xpath的selector
  442. @return: [[],[]..]
  443. """
  444. datas = []
  445. rows = table.xpath(".//tr")
  446. for row in rows:
  447. cols = row.xpath("./td|./th")
  448. row_datas = []
  449. for col in cols:
  450. data = col.xpath("string(.)").extract_first(default="").strip()
  451. row_datas.append(data)
  452. datas.append(row_datas)
  453. return datas
  454. def rows2json(rows, keys=None):
  455. """
  456. 将行数据转为json
  457. @param rows: 每一行的数据
  458. @param keys: json的key,空时将rows的第一行作为key
  459. @return:
  460. """
  461. data_start_pos = 0 if keys else 1
  462. datas = []
  463. keys = keys or rows[0]
  464. for values in rows[data_start_pos:]:
  465. datas.append(dict(zip(keys, values)))
  466. return datas
  467. def get_form_data(form):
  468. """
  469. 提取form中提交的数据
  470. :param form: 使用selector封装后的具有xpath的selector
  471. :return:
  472. """
  473. data = {}
  474. inputs = form.xpath(".//input")
  475. for input in inputs:
  476. name = input.xpath("./@name").extract_first()
  477. value = input.xpath("./@value").extract_first()
  478. if name:
  479. data[name] = value
  480. return data
  481. def get_domain(url):
  482. return urllib.parse.urlparse(url).netloc
  483. def get_index_url(url):
  484. return "/".join(url.split("/")[:3])
  485. def get_ip(domain):
  486. ip = socket.getaddrinfo(domain, "http")[0][4][0]
  487. return ip
  488. def get_localhost_ip():
  489. """
  490. 利用 UDP 协议来实现的,生成一个UDP包,把自己的 IP 放如到 UDP 协议头中,然后从UDP包中获取本机的IP。
  491. 这个方法并不会真实的向外部发包,所以用抓包工具是看不到的
  492. :return:
  493. """
  494. s = None
  495. try:
  496. s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
  497. s.connect(("8.8.8.8", 80))
  498. ip = s.getsockname()[0]
  499. finally:
  500. if s:
  501. s.close()
  502. return ip
  503. def ip_to_num(ip):
  504. import struct
  505. ip_num = socket.ntohl(struct.unpack("I", socket.inet_aton(str(ip)))[0])
  506. return ip_num
  507. def is_valid_proxy(proxy, check_url=None):
  508. """
  509. 检验代理是否有效
  510. @param proxy: xxx.xxx.xxx:xxx
  511. @param check_url: 利用目标网站检查,目标网站url。默认为None, 使用代理服务器的socket检查, 但不能排除Connection closed by foreign host
  512. @return: True / False
  513. """
  514. is_valid = False
  515. if check_url:
  516. proxies = {"http": f"http://{proxy}", "https": f"https://{proxy}"}
  517. headers = {
  518. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
  519. }
  520. response = None
  521. try:
  522. response = requests.get(
  523. check_url, headers=headers, proxies=proxies, stream=True, timeout=20
  524. )
  525. is_valid = True
  526. except Exception as e:
  527. log.error("check proxy failed: {} {}".format(e, proxy))
  528. finally:
  529. if response:
  530. response.close()
  531. else:
  532. ip, port = proxy.split(":")
  533. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sk:
  534. sk.settimeout(7)
  535. try:
  536. sk.connect((ip, int(port))) # 检查代理服务器是否开着
  537. is_valid = True
  538. except Exception as e:
  539. log.error("check proxy failed: {} {}:{}".format(e, ip, port))
  540. return is_valid
  541. def is_valid_url(url):
  542. """
  543. 验证url是否合法
  544. :param url:
  545. :return:
  546. """
  547. if re.match(r"(^https?:/{2}\w.+$)|(ftp://)", url):
  548. return True
  549. else:
  550. return False
  551. def get_text(soup, *args):
  552. try:
  553. return soup.get_text()
  554. except Exception as e:
  555. log.error(e)
  556. return ""
  557. def del_html_tag(content, except_line_break=False, save_img=False, white_replaced=""):
  558. """
  559. 删除html标签
  560. @param content: html内容
  561. @param except_line_break: 保留p标签
  562. @param save_img: 保留图片
  563. @param white_replaced: 空白符替换
  564. @return:
  565. """
  566. content = replace_str(content, "(?i)<script(.|\n)*?</script>") # (?)忽略大小写
  567. content = replace_str(content, "(?i)<style(.|\n)*?</style>")
  568. content = replace_str(content, "<!--(.|\n)*?-->")
  569. content = replace_str(
  570. content, "(?!&[a-z]+=)&[a-z]+;?"
  571. ) # 干掉&nbsp等无用的字符 但&xxx= 这种表示参数的除外
  572. if except_line_break:
  573. content = content.replace("</p>", "/p")
  574. content = replace_str(content, "<[^p].*?>")
  575. content = content.replace("/p", "</p>")
  576. content = replace_str(content, "[ \f\r\t\v]")
  577. elif save_img:
  578. content = replace_str(content, "(?!<img.+?>)<.+?>") # 替换掉除图片外的其他标签
  579. content = replace_str(content, "(?! +)\s+", "\n") # 保留空格
  580. content = content.strip()
  581. else:
  582. content = replace_str(content, "<(.|\n)*?>")
  583. content = replace_str(content, "\s", white_replaced)
  584. content = content.strip()
  585. return content
  586. def del_html_js_css(content):
  587. content = replace_str(content, "(?i)<script(.|\n)*?</script>") # (?)忽略大小写
  588. content = replace_str(content, "(?i)<style(.|\n)*?</style>")
  589. content = replace_str(content, "<!--(.|\n)*?-->")
  590. return content
  591. def is_have_chinese(content):
  592. regex = "[\u4e00-\u9fa5]+"
  593. chinese_word = get_info(content, regex)
  594. return chinese_word and True or False
  595. def is_have_english(content):
  596. regex = "[a-zA-Z]+"
  597. english_words = get_info(content, regex)
  598. return english_words and True or False
  599. def get_chinese_word(content):
  600. regex = "[\u4e00-\u9fa5]+"
  601. chinese_word = get_info(content, regex)
  602. return chinese_word
  603. def get_english_words(content):
  604. regex = "[a-zA-Z]+"
  605. english_words = get_info(content, regex)
  606. return english_words or ""
  607. ##################################################
  608. def get_json(json_str):
  609. """
  610. @summary: 取json对象
  611. ---------
  612. @param json_str: json格式的字符串
  613. ---------
  614. @result: 返回json对象
  615. """
  616. try:
  617. return json.loads(json_str) if json_str else {}
  618. except Exception as e1:
  619. try:
  620. json_str = json_str.strip()
  621. json_str = json_str.replace("'", '"')
  622. keys = get_info(json_str, "(\w+):")
  623. for key in keys:
  624. json_str = json_str.replace(key, '"%s"' % key)
  625. return json.loads(json_str) if json_str else {}
  626. except Exception as e2:
  627. log.error(
  628. """
  629. e1: %s
  630. format json_str: %s
  631. e2: %s
  632. """
  633. % (e1, json_str, e2)
  634. )
  635. return {}
  636. def jsonp2json(jsonp):
  637. """
  638. 将jsonp转为json
  639. @param jsonp: jQuery172013600082560040794_1553230569815({})
  640. @return:
  641. """
  642. try:
  643. return json.loads(re.match(".*?({.*}).*", jsonp, re.S).group(1))
  644. except:
  645. raise ValueError("Invalid Input")
  646. def dumps_json(data, indent=4, sort_keys=False):
  647. """
  648. @summary: 格式化json 用于打印
  649. ---------
  650. @param data: json格式的字符串或json对象
  651. ---------
  652. @result: 格式化后的字符串
  653. """
  654. try:
  655. if isinstance(data, str):
  656. data = get_json(data)
  657. data = json.dumps(
  658. data,
  659. ensure_ascii=False,
  660. indent=indent,
  661. skipkeys=True,
  662. sort_keys=sort_keys,
  663. default=str,
  664. )
  665. except Exception as e:
  666. data = pformat(data)
  667. return data
  668. def get_json_value(json_object, key):
  669. """
  670. @summary:
  671. ---------
  672. @param json_object: json对象或json格式的字符串
  673. @param key: 建值 如果在多个层级目录下 可写 key1.key2 如{'key1':{'key2':3}}
  674. ---------
  675. @result: 返回对应的值,如果没有,返回''
  676. """
  677. current_key = ""
  678. value = ""
  679. try:
  680. json_object = (
  681. isinstance(json_object, str) and get_json(json_object) or json_object
  682. )
  683. current_key = key.split(".")[0]
  684. value = json_object[current_key]
  685. key = key[key.find(".") + 1 :]
  686. except Exception as e:
  687. return value
  688. if key == current_key:
  689. return value
  690. else:
  691. return get_json_value(value, key)
  692. def get_all_keys(datas, depth=None, current_depth=0):
  693. """
  694. @summary: 获取json李所有的key
  695. ---------
  696. @param datas: dict / list
  697. @param depth: 字典key的层级 默认不限制层级 层级从1开始
  698. @param current_depth: 字典key的当前层级 不用传参
  699. ---------
  700. @result: 返回json所有的key
  701. """
  702. keys = []
  703. if depth and current_depth >= depth:
  704. return keys
  705. if isinstance(datas, list):
  706. for data in datas:
  707. keys.extend(get_all_keys(data, depth, current_depth=current_depth + 1))
  708. elif isinstance(datas, dict):
  709. for key, value in datas.items():
  710. keys.append(key)
  711. if isinstance(value, dict):
  712. keys.extend(get_all_keys(value, depth, current_depth=current_depth + 1))
  713. return keys
  714. def to_chinese(unicode_str):
  715. format_str = json.loads('{"chinese":"%s"}' % unicode_str)
  716. return format_str["chinese"]
  717. ##################################################
  718. def replace_str(source_str, regex, replace_str=""):
  719. """
  720. @summary: 替换字符串
  721. ---------
  722. @param source_str: 原字符串
  723. @param regex: 正则
  724. @param replace_str: 用什么来替换 默认为''
  725. ---------
  726. @result: 返回替换后的字符串
  727. """
  728. str_info = re.compile(regex)
  729. return str_info.sub(replace_str, source_str)
  730. def del_redundant_blank_character(text):
  731. """
  732. 删除冗余的空白符, 只保留一个
  733. :param text:
  734. :return:
  735. """
  736. return re.sub("\s+", " ", text)
  737. ##################################################
  738. def get_conf_value(config_file, section, key):
  739. cp = configparser.ConfigParser(allow_no_value=True)
  740. with codecs.open(config_file, "r", encoding="utf-8") as f:
  741. cp.read_file(f)
  742. return cp.get(section, key)
  743. def mkdir(path):
  744. try:
  745. if not os.path.exists(path):
  746. os.makedirs(path)
  747. except OSError as exc: # Python >2.5
  748. pass
  749. def write_file(filename, content, mode="w", encoding="utf-8"):
  750. """
  751. @summary: 写文件
  752. ---------
  753. @param filename: 文件名(有路径)
  754. @param content: 内容
  755. @param mode: 模式 w/w+ (覆盖/追加)
  756. ---------
  757. @result:
  758. """
  759. directory = os.path.dirname(filename)
  760. mkdir(directory)
  761. with open(filename, mode, encoding=encoding) as file:
  762. file.writelines(content)
  763. def read_file(filename, readlines=False, encoding="utf-8"):
  764. """
  765. @summary: 读文件
  766. ---------
  767. @param filename: 文件名(有路径)
  768. @param readlines: 按行读取 (默认False)
  769. ---------
  770. @result: 按行读取返回List,否则返回字符串
  771. """
  772. content = None
  773. try:
  774. with open(filename, "r", encoding=encoding) as file:
  775. content = file.readlines() if readlines else file.read()
  776. except Exception as e:
  777. log.error(e)
  778. return content
  779. def get_oss_file_list(oss_handler, prefix, date_range_min, date_range_max=None):
  780. """
  781. 获取文件列表
  782. @param prefix: 路径前缀 如 data/car_service_line/yiche/yiche_serial_zongshu_info
  783. @param date_range_min: 时间范围 最小值 日期分隔符为/ 如 2019/03/01 或 2019/03/01/00/00/00
  784. @param date_range_max: 时间范围 最大值 日期分隔符为/ 如 2019/03/01 或 2019/03/01/00/00/00
  785. @return: 每个文件路径 如 html/e_commerce_service_line/alibaba/alibaba_shop_info/2019/03/22/15/53/15/8ca8b9e4-4c77-11e9-9dee-acde48001122.json.snappy
  786. """
  787. # 计算时间范围
  788. date_range_max = date_range_max or date_range_min
  789. date_format = "/".join(
  790. ["%Y", "%m", "%d", "%H", "%M", "%S"][: date_range_min.count("/") + 1]
  791. )
  792. time_interval = [
  793. {"days": 365},
  794. {"days": 31},
  795. {"days": 1},
  796. {"hours": 1},
  797. {"minutes": 1},
  798. {"seconds": 1},
  799. ][date_range_min.count("/")]
  800. date_range = get_between_date(
  801. date_range_min, date_range_max, date_format=date_format, **time_interval
  802. )
  803. for date in date_range:
  804. file_folder_path = os.path.join(prefix, date)
  805. objs = oss_handler.list(prefix=file_folder_path)
  806. for obj in objs:
  807. filename = obj.key
  808. yield filename
  809. def is_html(url):
  810. if not url:
  811. return False
  812. try:
  813. content_type = request.urlopen(url).info().get("Content-Type", "")
  814. if "text/html" in content_type:
  815. return True
  816. else:
  817. return False
  818. except Exception as e:
  819. log.error(e)
  820. return False
  821. def is_exist(file_path):
  822. """
  823. @summary: 文件是否存在
  824. ---------
  825. @param file_path:
  826. ---------
  827. @result:
  828. """
  829. return os.path.exists(file_path)
  830. def download_file(url, file_path, *, call_func=None, proxies=None, data=None):
  831. """
  832. 下载文件,会自动创建文件存储目录
  833. Args:
  834. url: 地址
  835. file_path: 文件存储地址
  836. call_func: 下载成功的回调
  837. proxies: 代理
  838. data: 请求体
  839. Returns:
  840. """
  841. directory = os.path.dirname(file_path)
  842. mkdir(directory)
  843. # 进度条
  844. def progress_callfunc(blocknum, blocksize, totalsize):
  845. """回调函数
  846. @blocknum : 已经下载的数据块
  847. @blocksize : 数据块的大小
  848. @totalsize: 远程文件的大小
  849. """
  850. percent = 100.0 * blocknum * blocksize / totalsize
  851. if percent > 100:
  852. percent = 100
  853. # print ('进度条 %.2f%%' % percent, end = '\r')
  854. sys.stdout.write("进度条 %.2f%%" % percent + "\r")
  855. sys.stdout.flush()
  856. if url:
  857. try:
  858. if proxies:
  859. # create the object, assign it to a variable
  860. proxy = request.ProxyHandler(proxies)
  861. # construct a new opener using your proxy settings
  862. opener = request.build_opener(proxy)
  863. # install the openen on the module-level
  864. request.install_opener(opener)
  865. request.urlretrieve(url, file_path, progress_callfunc, data)
  866. if callable(call_func):
  867. call_func()
  868. return 1
  869. except Exception as e:
  870. log.error(e)
  871. return 0
  872. else:
  873. return 0
  874. def get_file_list(path, ignore=[]):
  875. templist = path.split("*")
  876. path = templist[0]
  877. file_type = templist[1] if len(templist) >= 2 else ""
  878. # 递归遍历文件
  879. def get_file_list_(path, file_type, ignore, all_file=[]):
  880. file_list = os.listdir(path)
  881. for file_name in file_list:
  882. if file_name in ignore:
  883. continue
  884. file_path = os.path.join(path, file_name)
  885. if os.path.isdir(file_path):
  886. get_file_list_(file_path, file_type, ignore, all_file)
  887. else:
  888. if not file_type or file_name.endswith(file_type):
  889. all_file.append(file_path)
  890. return all_file
  891. return get_file_list_(path, file_type, ignore) if os.path.isdir(path) else [path]
  892. def rename_file(old_name, new_name):
  893. os.rename(old_name, new_name)
  894. def del_file(path, ignore=()):
  895. files = get_file_list(path, ignore)
  896. for file in files:
  897. try:
  898. os.remove(file)
  899. except Exception as e:
  900. log.error(
  901. """
  902. 删除出错: %s
  903. Exception : %s
  904. """
  905. % (file, str(e))
  906. )
  907. finally:
  908. pass
  909. def get_file_type(file_name):
  910. """
  911. @summary: 取文件后缀名
  912. ---------
  913. @param file_name:
  914. ---------
  915. @result:
  916. """
  917. try:
  918. return os.path.splitext(file_name)[1]
  919. except Exception as e:
  920. log.exception(e)
  921. def get_file_path(file_path):
  922. """
  923. @summary: 取文件路径
  924. ---------
  925. @param file_path: /root/a.py
  926. ---------
  927. @result: /root
  928. """
  929. try:
  930. return os.path.split(file_path)[0]
  931. except Exception as e:
  932. log.exception(e)
  933. #############################################
  934. def exec_js(js_code):
  935. """
  936. @summary: 执行js代码
  937. ---------
  938. @param js_code: js代码
  939. ---------
  940. @result: 返回执行结果
  941. """
  942. return execjs.eval(js_code)
  943. def compile_js(js_func):
  944. """
  945. @summary: 编译js函数
  946. ---------
  947. @param js_func:js函数
  948. ---------
  949. @result: 返回函数对象 调用 fun('js_funName', param1,param2)
  950. """
  951. ctx = execjs.compile(js_func)
  952. return ctx.call
  953. ###############################################
  954. #############################################
  955. def date_to_timestamp(date, time_format="%Y-%m-%d %H:%M:%S"):
  956. """
  957. @summary:
  958. ---------
  959. @param date:将"2011-09-28 10:00:00"时间格式转化为时间戳
  960. @param format:时间格式
  961. ---------
  962. @result: 返回时间戳
  963. """
  964. timestamp = time.mktime(time.strptime(date, time_format))
  965. return int(timestamp)
  966. def timestamp_to_date(timestamp, time_format="%Y-%m-%d %H:%M:%S"):
  967. """
  968. @summary:
  969. ---------
  970. @param timestamp: 将时间戳转化为日期
  971. @param format: 日期格式
  972. ---------
  973. @result: 返回日期
  974. """
  975. if timestamp is None:
  976. raise ValueError("timestamp is null")
  977. date = time.localtime(timestamp)
  978. return time.strftime(time_format, date)
  979. def get_current_timestamp():
  980. return int(time.time())
  981. def get_current_date(date_format="%Y-%m-%d %H:%M:%S"):
  982. return datetime.datetime.now().strftime(date_format)
  983. # return time.strftime(date_format, time.localtime(time.time()))
  984. def get_date_number(year=None, month=None, day=None):
  985. """
  986. @summary: 获取指定日期对应的日期数
  987. 默认当前周
  988. ---------
  989. @param year: 2010
  990. @param month: 6
  991. @param day: 16
  992. ---------
  993. @result: (年号,第几周,第几天) 如 (2010, 24, 3)
  994. """
  995. if year and month and day:
  996. return datetime.date(year, month, day).isocalendar()
  997. elif not any([year, month, day]):
  998. return datetime.datetime.now().isocalendar()
  999. else:
  1000. assert year, "year 不能为空"
  1001. assert month, "month 不能为空"
  1002. assert day, "day 不能为空"
  1003. def get_between_date(
  1004. begin_date, end_date=None, date_format="%Y-%m-%d", **time_interval
  1005. ):
  1006. """
  1007. @summary: 获取一段时间间隔内的日期,默认为每一天
  1008. ---------
  1009. @param begin_date: 开始日期 str 如 2018-10-01
  1010. @param end_date: 默认为今日
  1011. @param date_format: 日期格式,应与begin_date的日期格式相对应
  1012. @param time_interval: 时间间隔 默认一天 支持 days、seconds、microseconds、milliseconds、minutes、hours、weeks
  1013. ---------
  1014. @result: list 值为字符串
  1015. """
  1016. date_list = []
  1017. begin_date = datetime.datetime.strptime(begin_date, date_format)
  1018. end_date = (
  1019. datetime.datetime.strptime(end_date, date_format)
  1020. if end_date
  1021. else datetime.datetime.strptime(
  1022. time.strftime(date_format, time.localtime(time.time())), date_format
  1023. )
  1024. )
  1025. time_interval = time_interval or dict(days=1)
  1026. while begin_date <= end_date:
  1027. date_str = begin_date.strftime(date_format)
  1028. date_list.append(date_str)
  1029. begin_date += datetime.timedelta(**time_interval)
  1030. if end_date.strftime(date_format) not in date_list:
  1031. date_list.append(end_date.strftime(date_format))
  1032. return date_list
  1033. def get_between_months(begin_date, end_date=None):
  1034. """
  1035. @summary: 获取一段时间间隔内的月份
  1036. 需要满一整月
  1037. ---------
  1038. @param begin_date: 开始时间 如 2018-01-01
  1039. @param end_date: 默认当前时间
  1040. ---------
  1041. @result: 列表 如 ['2018-01', '2018-02']
  1042. """
  1043. def add_months(dt, months):
  1044. month = dt.month - 1 + months
  1045. year = dt.year + month // 12
  1046. month = month % 12 + 1
  1047. day = min(dt.day, calendar.monthrange(year, month)[1])
  1048. return dt.replace(year=year, month=month, day=day)
  1049. date_list = []
  1050. begin_date = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
  1051. end_date = (
  1052. datetime.datetime.strptime(end_date, "%Y-%m-%d")
  1053. if end_date
  1054. else datetime.datetime.strptime(
  1055. time.strftime("%Y-%m-%d", time.localtime(time.time())), "%Y-%m-%d"
  1056. )
  1057. )
  1058. while begin_date <= end_date:
  1059. date_str = begin_date.strftime("%Y-%m")
  1060. date_list.append(date_str)
  1061. begin_date = add_months(begin_date, 1)
  1062. return date_list
  1063. def get_today_of_day(day_offset=0):
  1064. return str(datetime.date.today() + datetime.timedelta(days=day_offset))
  1065. def get_days_of_month(year, month):
  1066. """
  1067. 返回天数
  1068. """
  1069. return calendar.monthrange(year, month)[1]
  1070. def get_firstday_of_month(date):
  1071. """''
  1072. date format = "YYYY-MM-DD"
  1073. """
  1074. year, month, day = date.split("-")
  1075. year, month, day = int(year), int(month), int(day)
  1076. days = "01"
  1077. if int(month) < 10:
  1078. month = "0" + str(int(month))
  1079. arr = (year, month, days)
  1080. return "-".join("%s" % i for i in arr)
  1081. def get_lastday_of_month(date):
  1082. """''
  1083. get the last day of month
  1084. date format = "YYYY-MM-DD"
  1085. """
  1086. year, month, day = date.split("-")
  1087. year, month, day = int(year), int(month), int(day)
  1088. days = calendar.monthrange(year, month)[1]
  1089. month = add_zero(month)
  1090. arr = (year, month, days)
  1091. return "-".join("%s" % i for i in arr)
  1092. def get_firstday_month(month_offset=0):
  1093. """''
  1094. get the first day of month from today
  1095. month_offset is how many months
  1096. """
  1097. (y, m, d) = get_year_month_and_days(month_offset)
  1098. d = "01"
  1099. arr = (y, m, d)
  1100. return "-".join("%s" % i for i in arr)
  1101. def get_lastday_month(month_offset=0):
  1102. """''
  1103. get the last day of month from today
  1104. month_offset is how many months
  1105. """
  1106. return "-".join("%s" % i for i in get_year_month_and_days(month_offset))
  1107. def get_last_month(month_offset=0):
  1108. """''
  1109. get the last day of month from today
  1110. month_offset is how many months
  1111. """
  1112. return "-".join("%s" % i for i in get_year_month_and_days(month_offset)[:2])
  1113. def get_year_month_and_days(month_offset=0):
  1114. """
  1115. @summary:
  1116. ---------
  1117. @param month_offset: 月份偏移量
  1118. ---------
  1119. @result: ('2019', '04', '30')
  1120. """
  1121. today = datetime.datetime.now()
  1122. year, month = today.year, today.month
  1123. this_year = int(year)
  1124. this_month = int(month)
  1125. total_month = this_month + month_offset
  1126. if month_offset >= 0:
  1127. if total_month <= 12:
  1128. days = str(get_days_of_month(this_year, total_month))
  1129. total_month = add_zero(total_month)
  1130. return (year, total_month, days)
  1131. else:
  1132. i = total_month // 12
  1133. j = total_month % 12
  1134. if j == 0:
  1135. i -= 1
  1136. j = 12
  1137. this_year += i
  1138. days = str(get_days_of_month(this_year, j))
  1139. j = add_zero(j)
  1140. return (str(this_year), str(j), days)
  1141. else:
  1142. if (total_month > 0) and (total_month < 12):
  1143. days = str(get_days_of_month(this_year, total_month))
  1144. total_month = add_zero(total_month)
  1145. return (year, total_month, days)
  1146. else:
  1147. i = total_month // 12
  1148. j = total_month % 12
  1149. if j == 0:
  1150. i -= 1
  1151. j = 12
  1152. this_year += i
  1153. days = str(get_days_of_month(this_year, j))
  1154. j = add_zero(j)
  1155. return (str(this_year), str(j), days)
  1156. def add_zero(n):
  1157. return "%02d" % n
  1158. def get_month(month_offset=0):
  1159. """''
  1160. 获取当前日期前后N月的日期
  1161. if month_offset>0, 获取当前日期前N月的日期
  1162. if month_offset<0, 获取当前日期后N月的日期
  1163. date format = "YYYY-MM-DD"
  1164. """
  1165. today = datetime.datetime.now()
  1166. day = add_zero(today.day)
  1167. (y, m, d) = get_year_month_and_days(month_offset)
  1168. arr = (y, m, d)
  1169. if int(day) < int(d):
  1170. arr = (y, m, day)
  1171. return "-".join("%s" % i for i in arr)
  1172. @run_safe_model("format_date")
  1173. def format_date(date, old_format="", new_format="%Y-%m-%d %H:%M:%S"):
  1174. """
  1175. @summary: 格式化日期格式
  1176. ---------
  1177. @param date: 日期 eg:2017年4月17日 3时27分12秒
  1178. @param old_format: 原来的日期格式 如 '%Y年%m月%d日 %H时%M分%S秒'
  1179. %y 两位数的年份表示(00-99)
  1180. %Y 四位数的年份表示(000-9999)
  1181. %m 月份(01-12)
  1182. %d 月内中的一天(0-31)
  1183. %H 24小时制小时数(0-23)
  1184. %I 12小时制小时数(01-12)
  1185. %M 分钟数(00-59)
  1186. %S 秒(00-59)
  1187. @param new_format: 输出的日期格式
  1188. ---------
  1189. @result: 格式化后的日期,类型为字符串 如2017-4-17 03:27:12
  1190. """
  1191. if not date:
  1192. return ""
  1193. if not old_format:
  1194. regex = "(\d+)"
  1195. numbers = get_info(date, regex, allow_repeat=True)
  1196. formats = ["%Y", "%m", "%d", "%H", "%M", "%S"]
  1197. old_format = date
  1198. for i, number in enumerate(numbers[:6]):
  1199. if i == 0 and len(number) == 2: # 年份可能是两位 用小%y
  1200. old_format = old_format.replace(
  1201. number, formats[i].lower(), 1
  1202. ) # 替换一次 '2017年11月30日 11:49' 防止替换11月时,替换11小时
  1203. else:
  1204. old_format = old_format.replace(number, formats[i], 1) # 替换一次
  1205. try:
  1206. date_obj = datetime.datetime.strptime(date, old_format)
  1207. if "T" in date and "Z" in date:
  1208. date_obj += datetime.timedelta(hours=8)
  1209. date_str = date_obj.strftime("%Y-%m-%d %H:%M:%S")
  1210. else:
  1211. date_str = datetime.datetime.strftime(date_obj, new_format)
  1212. except Exception as e:
  1213. log.error("日期格式化出错,old_format = %s 不符合 %s 格式" % (old_format, date))
  1214. date_str = date
  1215. return date_str
  1216. def transform_lower_num(data_str: str):
  1217. num_map = {
  1218. "一": "1",
  1219. "二": "2",
  1220. "三": "3",
  1221. "四": "4",
  1222. "五": "5",
  1223. "六": "6",
  1224. "七": "7",
  1225. "八": "8",
  1226. "九": "9",
  1227. "十": "0",
  1228. }
  1229. pattern = f'[{"|".join(num_map.keys())}|零]'
  1230. res = re.search(pattern, data_str)
  1231. if not res:
  1232. # 如果字符串中没有包含中文数字 不做处理 直接返回
  1233. return data_str
  1234. data_str = data_str.replace("0", "零")
  1235. for n in num_map:
  1236. data_str = data_str.replace(n, num_map[n])
  1237. re_data_str = re.findall("\d+", data_str)
  1238. for i in re_data_str:
  1239. if len(i) == 3:
  1240. new_i = i.replace("0", "")
  1241. data_str = data_str.replace(i, new_i, 1)
  1242. elif len(i) == 4:
  1243. new_i = i.replace("10", "")
  1244. data_str = data_str.replace(i, new_i, 1)
  1245. elif len(i) == 2 and int(i) < 10:
  1246. new_i = int(i) + 10
  1247. data_str = data_str.replace(i, str(new_i), 1)
  1248. elif len(i) == 1 and int(i) == 0:
  1249. new_i = int(i) + 10
  1250. data_str = data_str.replace(i, str(new_i), 1)
  1251. return data_str.replace("零", "0")
  1252. @run_safe_model("format_time")
  1253. def format_time(release_time, date_format="%Y-%m-%d %H:%M:%S"):
  1254. """
  1255. >>> format_time("2个月前")
  1256. '2021-08-15 16:24:21'
  1257. >>> format_time("2月前")
  1258. '2021-08-15 16:24:36'
  1259. """
  1260. release_time = transform_lower_num(release_time)
  1261. release_time = release_time.replace("日", "天").replace("/", "-")
  1262. if "年前" in release_time:
  1263. years = re.compile("(\d+)\s*年前").findall(release_time)
  1264. years_ago = datetime.datetime.now() - datetime.timedelta(
  1265. days=int(years[0]) * 365
  1266. )
  1267. release_time = years_ago.strftime("%Y-%m-%d %H:%M:%S")
  1268. elif "月前" in release_time:
  1269. months = re.compile("(\d+)[\s个]*月前").findall(release_time)
  1270. months_ago = datetime.datetime.now() - datetime.timedelta(
  1271. days=int(months[0]) * 30
  1272. )
  1273. release_time = months_ago.strftime("%Y-%m-%d %H:%M:%S")
  1274. elif "周前" in release_time:
  1275. weeks = re.compile("(\d+)\s*周前").findall(release_time)
  1276. weeks_ago = datetime.datetime.now() - datetime.timedelta(days=int(weeks[0]) * 7)
  1277. release_time = weeks_ago.strftime("%Y-%m-%d %H:%M:%S")
  1278. elif "天前" in release_time:
  1279. ndays = re.compile("(\d+)\s*天前").findall(release_time)
  1280. days_ago = datetime.datetime.now() - datetime.timedelta(days=int(ndays[0]))
  1281. release_time = days_ago.strftime("%Y-%m-%d %H:%M:%S")
  1282. elif "小时前" in release_time:
  1283. nhours = re.compile("(\d+)\s*小时前").findall(release_time)
  1284. hours_ago = datetime.datetime.now() - datetime.timedelta(hours=int(nhours[0]))
  1285. release_time = hours_ago.strftime("%Y-%m-%d %H:%M:%S")
  1286. elif "分钟前" in release_time:
  1287. nminutes = re.compile("(\d+)\s*分钟前").findall(release_time)
  1288. minutes_ago = datetime.datetime.now() - datetime.timedelta(
  1289. minutes=int(nminutes[0])
  1290. )
  1291. release_time = minutes_ago.strftime("%Y-%m-%d %H:%M:%S")
  1292. elif "前天" in release_time:
  1293. today = datetime.date.today()
  1294. yesterday = today - datetime.timedelta(days=2)
  1295. release_time = release_time.replace("前天", str(yesterday))
  1296. elif "昨天" in release_time:
  1297. today = datetime.date.today()
  1298. yesterday = today - datetime.timedelta(days=1)
  1299. release_time = release_time.replace("昨天", str(yesterday))
  1300. elif "今天" in release_time:
  1301. release_time = release_time.replace("今天", get_current_date("%Y-%m-%d"))
  1302. elif "刚刚" in release_time:
  1303. release_time = get_current_date()
  1304. elif re.search("^\d\d:\d\d", release_time):
  1305. release_time = get_current_date("%Y-%m-%d") + " " + release_time
  1306. elif not re.compile("\d{4}").findall(release_time):
  1307. month = re.compile("\d{1,2}").findall(release_time)
  1308. if month and int(month[0]) <= int(get_current_date("%m")):
  1309. release_time = get_current_date("%Y") + "-" + release_time
  1310. else:
  1311. release_time = str(int(get_current_date("%Y")) - 1) + "-" + release_time
  1312. # 把日和小时粘在一起的拆开
  1313. template = re.compile("(\d{4}-\d{1,2}-\d{2})(\d{1,2})")
  1314. release_time = re.sub(template, r"\1 \2", release_time)
  1315. release_time = format_date(release_time, new_format=date_format)
  1316. return release_time
  1317. def to_date(date_str, date_format="%Y-%m-%d %H:%M:%S"):
  1318. return datetime.datetime.strptime(date_str, date_format)
  1319. def get_before_date(
  1320. current_date,
  1321. days,
  1322. current_date_format="%Y-%m-%d %H:%M:%S",
  1323. return_date_format="%Y-%m-%d %H:%M:%S",
  1324. ):
  1325. """
  1326. @summary: 获取之前时间
  1327. ---------
  1328. @param current_date: 当前时间 str类型
  1329. @param days: 时间间隔 -1 表示前一天 1 表示后一天
  1330. @param days: 返回的时间格式
  1331. ---------
  1332. @result: 字符串
  1333. """
  1334. current_date = to_date(current_date, current_date_format)
  1335. date_obj = current_date + datetime.timedelta(days=days)
  1336. return datetime.datetime.strftime(date_obj, return_date_format)
  1337. def get_utcnow():
  1338. """utc时间"""
  1339. return datetime.datetime.utcnow()
  1340. def delay_time(sleep_time=60):
  1341. """
  1342. @summary: 睡眠 默认1分钟
  1343. ---------
  1344. @param sleep_time: 以秒为单位
  1345. ---------
  1346. @result:
  1347. """
  1348. time.sleep(sleep_time)
  1349. def format_seconds(seconds):
  1350. """
  1351. @summary: 将秒转为时分秒
  1352. ---------
  1353. @param seconds:
  1354. ---------
  1355. @result: 2天3小时2分49秒
  1356. """
  1357. seconds = int(seconds + 0.5) # 向上取整
  1358. m, s = divmod(seconds, 60)
  1359. h, m = divmod(m, 60)
  1360. d, h = divmod(h, 24)
  1361. times = ""
  1362. if d:
  1363. times += "{}天".format(d)
  1364. if h:
  1365. times += "{}小时".format(h)
  1366. if m:
  1367. times += "{}分".format(m)
  1368. if s:
  1369. times += "{}秒".format(s)
  1370. return times
  1371. ################################################
  1372. def get_md5(*args):
  1373. """
  1374. @summary: 获取唯一的32位md5
  1375. ---------
  1376. @param *args: 参与联合去重的值
  1377. ---------
  1378. @result: 7c8684bcbdfcea6697650aa53d7b1405
  1379. """
  1380. m = hashlib.md5()
  1381. for arg in args:
  1382. m.update(str(arg).encode())
  1383. return m.hexdigest()
  1384. def get_sha1(*args):
  1385. """
  1386. @summary: 获取唯一的40位值, 用于获取唯一的id
  1387. ---------
  1388. @param *args: 参与联合去重的值
  1389. ---------
  1390. @result: ba4868b3f277c8e387b55d9e3d0be7c045cdd89e
  1391. """
  1392. sha1 = hashlib.sha1()
  1393. for arg in args:
  1394. sha1.update(str(arg).encode())
  1395. return sha1.hexdigest() # 40位
  1396. def get_sha256(*args):
  1397. """
  1398. @summary: 获取唯一的64位值, 用于获取唯一的id
  1399. ---------
  1400. @param *args: 参与联合去重的值
  1401. ---------
  1402. @result: 5580c91ea29bf5bd963f4c08dfcacd983566e44ecea1735102bc380576fd6f30
  1403. """
  1404. sha256 = hashlib.sha256()
  1405. for arg in args:
  1406. sha256.update(str(arg).encode())
  1407. return sha256.hexdigest() # 64位
  1408. def get_base64(secret, message):
  1409. """
  1410. @summary: 数字证书签名算法是:"HMAC-SHA256"
  1411. 参考:https://www.jokecamp.com/blog/examples-of-creating-base64-hashes-using-hmac-sha256-in-different-languages/
  1412. ---------
  1413. @param secret: 秘钥
  1414. @param message: 消息
  1415. ---------
  1416. @result: 签名输出类型是:"base64"
  1417. """
  1418. import hashlib
  1419. import hmac
  1420. import base64
  1421. message = bytes(message, "utf-8")
  1422. secret = bytes(secret, "utf-8")
  1423. signature = base64.b64encode(
  1424. hmac.new(secret, message, digestmod=hashlib.sha256).digest()
  1425. ).decode("utf8")
  1426. return signature
  1427. def get_uuid(key1="", key2=""):
  1428. """
  1429. @summary: 计算uuid值
  1430. 可用于将两个字符串组成唯一的值。如可将域名和新闻标题组成uuid,形成联合索引
  1431. ---------
  1432. @param key1:str
  1433. @param key2:str
  1434. ---------
  1435. @result:
  1436. """
  1437. if not key1 and not key2:
  1438. uuid_object = uuid.uuid1()
  1439. else:
  1440. hash_ = md5(bytes(key1, "utf-8") + bytes(key2, "utf-8")).digest()
  1441. uuid_object = uuid.UUID(bytes=hash_[:16], version=3)
  1442. return str(uuid_object)
  1443. def get_hash(text):
  1444. return hash(text)
  1445. def decrypt(input_str: str) -> str:
  1446. """
  1447. 改写:新增
  1448. 定义base64解密函数
  1449. :param input_str:
  1450. :return:
  1451. """
  1452. key = "ABNOPqrceQRSTklmUDEFGXYZabnopfghHVWdijstuvwCIJKLMxyz0123456789+/"
  1453. ascii_list = ['{:0>6}'.format(str(bin(key.index(i))).replace('0b', '')) for i in input_str if i != '=']
  1454. output_str = ''
  1455. # 对前面不是“=”的字节取索引,然后转换为2进制
  1456. # 补齐“=”的个数
  1457. equal_num = input_str.count('=')
  1458. while ascii_list:
  1459. temp_list = ascii_list[:4]
  1460. # 转换成2进制字符串
  1461. temp_str = ''.join(temp_list)
  1462. # 对没有8位2进制的字符串补够8位2进制
  1463. if len(temp_str) % 8 != 0:
  1464. temp_str = temp_str[0:-1 * equal_num * 2]
  1465. # 4个6字节的二进制 转换 为三个8字节的二进制
  1466. temp_str_list = [temp_str[x:x + 8] for x in [0, 8, 16]]
  1467. # 二进制转为10进制
  1468. temp_str_list = [int(x, 2) for x in temp_str_list if x]
  1469. # 连接成字符串
  1470. output_str += ''.join([chr(x) for x in temp_str_list])
  1471. ascii_list = ascii_list[4:]
  1472. return output_str
  1473. ##################################################
  1474. def cut_string(text, length):
  1475. """
  1476. @summary: 将文本按指定长度拆分
  1477. ---------
  1478. @param text: 文本
  1479. @param length: 拆分长度
  1480. ---------
  1481. @result: 返回按指定长度拆分后形成的list
  1482. """
  1483. text_list = re.findall(".{%d}" % length, text, re.S)
  1484. leave_text = text[len(text_list) * length :]
  1485. if leave_text:
  1486. text_list.append(leave_text)
  1487. return text_list
  1488. def get_random_string(length=1):
  1489. random_string = "".join(random.sample(string.ascii_letters + string.digits, length))
  1490. return random_string
  1491. def get_random_password(length=8, special_characters=""):
  1492. """
  1493. @summary: 创建随机密码 默认长度为8,包含大写字母、小写字母、数字
  1494. ---------
  1495. @param length: 密码长度 默认8
  1496. @param special_characters: 特殊字符
  1497. ---------
  1498. @result: 指定长度的密码
  1499. """
  1500. while True:
  1501. random_password = "".join(
  1502. random.sample(
  1503. string.ascii_letters + string.digits + special_characters, length
  1504. )
  1505. )
  1506. if (
  1507. re.search("[0-9]", random_password)
  1508. and re.search("[A-Z]", random_password)
  1509. and re.search("[a-z]", random_password)
  1510. ):
  1511. if not special_characters:
  1512. break
  1513. elif set(random_password).intersection(special_characters):
  1514. break
  1515. return random_password
  1516. def get_random_email(length=None, email_types: list = None, special_characters=""):
  1517. """
  1518. 随机生成邮箱
  1519. :param length: 邮箱长度
  1520. :param email_types: 邮箱类型
  1521. :param special_characters: 特殊字符
  1522. :return:
  1523. """
  1524. if not length:
  1525. length = random.randint(4, 12)
  1526. if not email_types:
  1527. email_types = [
  1528. "qq.com",
  1529. "163.com",
  1530. "gmail.com",
  1531. "yahoo.com",
  1532. "hotmail.com",
  1533. "yeah.net",
  1534. "126.com",
  1535. "139.com",
  1536. "sohu.com",
  1537. ]
  1538. email_body = get_random_password(length, special_characters)
  1539. email_type = random.choice(email_types)
  1540. email = email_body + "@" + email_type
  1541. return email
  1542. #################################
  1543. def dumps_obj(obj):
  1544. return pickle.dumps(obj)
  1545. def loads_obj(obj_str):
  1546. return pickle.loads(obj_str)
  1547. def get_method(obj, name):
  1548. name = str(name)
  1549. try:
  1550. return getattr(obj, name)
  1551. except AttributeError:
  1552. log.error("Method %r not found in: %s" % (name, obj))
  1553. return None
  1554. def resolve_method(context, target):
  1555. """
  1556. 解析目标字符串并返回可调用的方法。
  1557. :param context: 上下文,可以是单个对象或上下文字典。
  1558. - 如果是类、函数或模块,直接使用该对象。
  1559. - 如果是类实例,直接使用该对象解析方法。
  1560. - 如果是字典,则按键值解析对象。例如 {'self': instance, 'other': other_instance}
  1561. :param target: 目标字符串,例如 'self.detail_get' 或 'other.some_method'
  1562. :return: 可调用的方法对象
  1563. """
  1564. target = str(target)
  1565. if "." not in target or target.count(".") != 1:
  1566. raise ValueError(
  1567. f"Invalid target format: {target}. "
  1568. f"Expected format: 'object.method'."
  1569. )
  1570. obj_name, method_name = target.split(".", 1)
  1571. # 解析上下文
  1572. if isinstance(context, dict):
  1573. # 如果是字典,按对象名获取对象
  1574. obj = context.get(obj_name)
  1575. if obj is None:
  1576. raise ValueError(f"Object '{obj_name}' not found in context.")
  1577. elif inspect.isclass(context) or inspect.isroutine(context) or inspect.ismodule(context):
  1578. # 如果是类、函数或模块,直接使用该对象
  1579. obj = context
  1580. if obj_name != getattr(obj, "__name__", None) and obj_name != "self":
  1581. raise ValueError(
  1582. f"Unsupported object name: {obj_name}. "
  1583. f"Expected '{getattr(obj, '__name__', None)}' or 'self'."
  1584. )
  1585. elif isinstance(context, object):
  1586. # 如果是类实例,直接使用该对象
  1587. obj = context
  1588. if obj_name != getattr(obj.__class__, "__name__", None) and obj_name != "self":
  1589. raise ValueError(
  1590. f"Unsupported object name: {obj_name}. "
  1591. f"Expected '{getattr(obj.__class__, '__name__', None)}' or 'self'."
  1592. )
  1593. else:
  1594. raise TypeError("Context must be either class, function, module or instance.")
  1595. method = getattr(obj, method_name, None)
  1596. if method is None or not callable(method):
  1597. raise AttributeError(
  1598. f"Method '{method_name}' not found or not callable on object '{obj_name}'."
  1599. )
  1600. return method
  1601. def witch_workspace(project_path):
  1602. """
  1603. @summary:
  1604. ---------
  1605. @param project_path:
  1606. ---------
  1607. @result:
  1608. """
  1609. os.chdir(project_path) # 切换工作路经
  1610. ############### 数据库相关 #######################
  1611. def format_sql_value(value):
  1612. if isinstance(value, str):
  1613. value = value.strip()
  1614. elif isinstance(value, (list, dict)):
  1615. value = dumps_json(value, indent=None)
  1616. elif isinstance(value, (datetime.date, datetime.time)):
  1617. value = str(value)
  1618. elif isinstance(value, bool):
  1619. value = int(value)
  1620. return value
  1621. def list2str(datas):
  1622. """
  1623. 列表转字符串
  1624. :param datas: [1, 2]
  1625. :return: (1, 2)
  1626. """
  1627. data_str = str(tuple(datas))
  1628. data_str = re.sub(",\)$", ")", data_str)
  1629. return data_str
  1630. def make_insert_sql(
  1631. table, data, auto_update=False, update_columns=(), insert_ignore=False
  1632. ):
  1633. """
  1634. @summary: 适用于mysql, oracle数据库时间需要to_date 处理(TODO)
  1635. ---------
  1636. @param table:
  1637. @param data: 表数据 json格式
  1638. @param auto_update: 使用的是replace into, 为完全覆盖已存在的数据
  1639. @param update_columns: 需要更新的列 默认全部,当指定值时,auto_update设置无效,当duplicate key冲突时更新指定的列
  1640. @param insert_ignore: 数据存在忽略
  1641. ---------
  1642. @result:
  1643. """
  1644. keys = ["`{}`".format(key) for key in data.keys()]
  1645. keys = list2str(keys).replace("'", "")
  1646. values = [format_sql_value(value) for value in data.values()]
  1647. values = list2str(values)
  1648. if update_columns:
  1649. if not isinstance(update_columns, (tuple, list)):
  1650. update_columns = [update_columns]
  1651. update_columns_ = ", ".join(
  1652. ["{key}=values({key})".format(key=key) for key in update_columns]
  1653. )
  1654. sql = (
  1655. "insert%s into `{table}` {keys} values {values} on duplicate key update %s"
  1656. % (" ignore" if insert_ignore else "", update_columns_)
  1657. )
  1658. elif auto_update:
  1659. sql = "replace into `{table}` {keys} values {values}"
  1660. else:
  1661. sql = "insert%s into `{table}` {keys} values {values}" % (
  1662. " ignore" if insert_ignore else ""
  1663. )
  1664. sql = sql.format(table=table, keys=keys, values=values).replace("None", "null")
  1665. return sql
  1666. def make_update_sql(table, data, condition):
  1667. """
  1668. @summary: 适用于mysql, oracle数据库时间需要to_date 处理(TODO)
  1669. ---------
  1670. @param table:
  1671. @param data: 表数据 json格式
  1672. @param condition: where 条件
  1673. ---------
  1674. @result:
  1675. """
  1676. key_values = []
  1677. for key, value in data.items():
  1678. value = format_sql_value(value)
  1679. if isinstance(value, str):
  1680. key_values.append("`{}`={}".format(key, repr(value)))
  1681. elif value is None:
  1682. key_values.append("`{}`={}".format(key, "null"))
  1683. else:
  1684. key_values.append("`{}`={}".format(key, value))
  1685. key_values = ", ".join(key_values)
  1686. sql = "update `{table}` set {key_values} where {condition}"
  1687. sql = sql.format(table=table, key_values=key_values, condition=condition)
  1688. return sql
  1689. def make_batch_sql(
  1690. table, datas, auto_update=False, update_columns=(), update_columns_value=()
  1691. ):
  1692. """
  1693. @summary: 生产批量的sql
  1694. ---------
  1695. @param table:
  1696. @param datas: 表数据 [{...}]
  1697. @param auto_update: 使用的是replace into, 为完全覆盖已存在的数据
  1698. @param update_columns: 需要更新的列 默认全部,当指定值时,auto_update设置无效,当duplicate key冲突时更新指定的列
  1699. @param update_columns_value: 需要更新的列的值 默认为datas里边对应的值, 注意 如果值为字符串类型 需要主动加单引号, 如 update_columns_value=("'test'",)
  1700. ---------
  1701. @result:
  1702. """
  1703. if not datas:
  1704. return
  1705. keys = list(datas[0].keys())
  1706. values_placeholder = ["%s"] * len(keys)
  1707. values = []
  1708. for data in datas:
  1709. value = []
  1710. for key in keys:
  1711. current_data = data.get(key)
  1712. current_data = format_sql_value(current_data)
  1713. value.append(current_data)
  1714. values.append(value)
  1715. keys = ["`{}`".format(key) for key in keys]
  1716. keys = list2str(keys).replace("'", "")
  1717. values_placeholder = list2str(values_placeholder).replace("'", "")
  1718. if update_columns:
  1719. if not isinstance(update_columns, (tuple, list)):
  1720. update_columns = [update_columns]
  1721. if update_columns_value:
  1722. update_columns_ = ", ".join(
  1723. [
  1724. "`{key}`={value}".format(key=key, value=value)
  1725. for key, value in zip(update_columns, update_columns_value)
  1726. ]
  1727. )
  1728. else:
  1729. update_columns_ = ", ".join(
  1730. ["`{key}`=values(`{key}`)".format(key=key) for key in update_columns]
  1731. )
  1732. sql = "insert into `{table}` {keys} values {values_placeholder} on duplicate key update {update_columns}".format(
  1733. table=table,
  1734. keys=keys,
  1735. values_placeholder=values_placeholder,
  1736. update_columns=update_columns_,
  1737. )
  1738. elif auto_update:
  1739. sql = "replace into `{table}` {keys} values {values_placeholder}".format(
  1740. table=table, keys=keys, values_placeholder=values_placeholder
  1741. )
  1742. else:
  1743. sql = "insert ignore into `{table}` {keys} values {values_placeholder}".format(
  1744. table=table, keys=keys, values_placeholder=values_placeholder
  1745. )
  1746. return sql, values
  1747. ############### json相关 #######################
  1748. def key2underline(key: str, strict=True):
  1749. """
  1750. >>> key2underline("HelloWord")
  1751. 'hello_word'
  1752. >>> key2underline("SHData", strict=True)
  1753. 's_h_data'
  1754. >>> key2underline("SHData", strict=False)
  1755. 'sh_data'
  1756. >>> key2underline("SHDataHi", strict=False)
  1757. 'sh_data_hi'
  1758. >>> key2underline("SHDataHi", strict=True)
  1759. 's_h_data_hi'
  1760. >>> key2underline("dataHi", strict=True)
  1761. 'data_hi'
  1762. """
  1763. regex = "[A-Z]*" if not strict else "[A-Z]"
  1764. capitals = re.findall(regex, key)
  1765. if capitals:
  1766. for capital in capitals:
  1767. if not capital:
  1768. continue
  1769. if key.startswith(capital):
  1770. if len(capital) > 1:
  1771. key = key.replace(
  1772. capital, capital[:-1].lower() + "_" + capital[-1].lower(), 1
  1773. )
  1774. else:
  1775. key = key.replace(capital, capital.lower(), 1)
  1776. else:
  1777. if len(capital) > 1:
  1778. key = key.replace(capital, "_" + capital.lower() + "_", 1)
  1779. else:
  1780. key = key.replace(capital, "_" + capital.lower(), 1)
  1781. return key.strip("_")
  1782. def key2hump(key):
  1783. """
  1784. 下划线试变成首字母大写
  1785. """
  1786. return key.title().replace("_", "")
  1787. def format_json_key(json_data):
  1788. json_data_correct = {}
  1789. for key, value in json_data.items():
  1790. key = key2underline(key)
  1791. json_data_correct[key] = value
  1792. return json_data_correct
  1793. def quick_to_json(text):
  1794. """
  1795. @summary: 可快速将浏览器上的header转为json格式
  1796. ---------
  1797. @param text:
  1798. ---------
  1799. @result:
  1800. """
  1801. contents = text.split("\n")
  1802. json = {}
  1803. for content in contents:
  1804. if content == "\n":
  1805. continue
  1806. content = content.strip()
  1807. regex = ["(:?.*?):(.*)", "(.*?):? +(.*)", "([^:]*)"]
  1808. result = get_info(content, regex)
  1809. result = result[0] if isinstance(result[0], tuple) else result
  1810. try:
  1811. json[result[0]] = eval(result[1].strip())
  1812. except:
  1813. json[result[0]] = result[1].strip()
  1814. return json
  1815. ##############################
  1816. def print_pretty(object):
  1817. pprint(object)
  1818. def print_params2json(url):
  1819. params_json = {}
  1820. params = url.split("?")[-1].split("&")
  1821. for param in params:
  1822. key_value = param.split("=", 1)
  1823. params_json[key_value[0]] = key_value[1]
  1824. print(dumps_json(params_json))
  1825. def print_cookie2json(cookie_str_or_list):
  1826. if isinstance(cookie_str_or_list, str):
  1827. cookie_json = {}
  1828. cookies = cookie_str_or_list.split("; ")
  1829. for cookie in cookies:
  1830. name, value = cookie.split("=")
  1831. cookie_json[name] = value
  1832. else:
  1833. cookie_json = get_cookies_from_selenium_cookie(cookie_str_or_list)
  1834. print(dumps_json(cookie_json))
  1835. ###############################
  1836. def flatten(x):
  1837. """flatten(sequence) -> list
  1838. Returns a single, flat list which contains all elements retrieved
  1839. from the sequence and all recursively contained sub-sequences
  1840. (iterables).
  1841. Examples:
  1842. >>> [1, 2, [3,4], (5,6)]
  1843. [1, 2, [3, 4], (5, 6)]
  1844. >>> flatten([[[1,2,3], (42,None)], [4,5], [6], 7, (8,9,10)])
  1845. [1, 2, 3, 42, None, 4, 5, 6, 7, 8, 9, 10]
  1846. >>> flatten(["foo", "bar"])
  1847. ['foo', 'bar']
  1848. >>> flatten(["foo", ["baz", 42], "bar"])
  1849. ['foo', 'baz', 42, 'bar']
  1850. """
  1851. return list(iflatten(x))
  1852. def iflatten(x):
  1853. """iflatten(sequence) -> iterator
  1854. Similar to ``.flatten()``, but returns iterator instead"""
  1855. for el in x:
  1856. if _is_listlike(el):
  1857. for el_ in flatten(el):
  1858. yield el_
  1859. else:
  1860. yield el
  1861. def _is_listlike(x):
  1862. """
  1863. >>> _is_listlike("foo")
  1864. False
  1865. >>> _is_listlike(5)
  1866. False
  1867. >>> _is_listlike(b"foo")
  1868. False
  1869. >>> _is_listlike([b"foo"])
  1870. True
  1871. >>> _is_listlike((b"foo",))
  1872. True
  1873. >>> _is_listlike({})
  1874. True
  1875. >>> _is_listlike(set())
  1876. True
  1877. >>> _is_listlike((x for x in range(3)))
  1878. True
  1879. >>> _is_listlike(six.moves.xrange(5))
  1880. True
  1881. """
  1882. return hasattr(x, "__iter__") and not isinstance(x, (six.text_type, bytes))
  1883. ###################
  1884. def re_def_supper_class(obj, supper_class):
  1885. """
  1886. 重新定义父类
  1887. @param obj: 类 如 class A: 则obj为A 或者 A的实例 a.__class__
  1888. @param supper_class: 父类
  1889. @return:
  1890. """
  1891. obj.__bases__ = (supper_class,)
  1892. ###################
  1893. freq_limit_record = {}
  1894. def reach_freq_limit(rate_limit, *key):
  1895. """
  1896. 频率限制
  1897. :param rate_limit: 限制时间 单位秒
  1898. :param key: 频率限制的key
  1899. :return: True / False
  1900. """
  1901. if rate_limit == 0:
  1902. return False
  1903. msg_md5 = get_md5(*key)
  1904. key = "rate_limit:{}".format(msg_md5)
  1905. try:
  1906. if get_redisdb().get(key):
  1907. return True
  1908. get_redisdb().set(key, time.time(), ex=rate_limit)
  1909. except redis.exceptions.ConnectionError as e:
  1910. # 使用内存做频率限制
  1911. global freq_limit_record
  1912. if key not in freq_limit_record:
  1913. freq_limit_record[key] = time.time()
  1914. return False
  1915. if time.time() - freq_limit_record.get(key) < rate_limit:
  1916. return True
  1917. else:
  1918. freq_limit_record[key] = time.time()
  1919. return False
  1920. def dingding_warning(
  1921. message, message_prefix=None, rate_limit=None, url=None, user_phone=None
  1922. ):
  1923. # 为了加载最新的配置
  1924. rate_limit = rate_limit if rate_limit is not None else setting.WARNING_INTERVAL
  1925. url = url or setting.DINGDING_WARNING_URL
  1926. user_phone = user_phone or setting.DINGDING_WARNING_PHONE
  1927. if not all([url, message]):
  1928. return
  1929. if reach_freq_limit(rate_limit, url, user_phone, message_prefix or message):
  1930. log.info("报警时间间隔过短,此次报警忽略。 内容 {}".format(message))
  1931. return
  1932. if isinstance(user_phone, str):
  1933. user_phone = [user_phone] if user_phone else []
  1934. data = {
  1935. "msgtype": "text",
  1936. "text": {"content": message},
  1937. "at": {"atMobiles": user_phone, "isAtAll": setting.DINGDING_WARNING_ALL},
  1938. }
  1939. headers = {"Content-Type": "application/json"}
  1940. try:
  1941. response = requests.post(
  1942. url, headers=headers, data=json.dumps(data).encode("utf8")
  1943. )
  1944. result = response.json()
  1945. response.close()
  1946. if result.get("errcode") == 0:
  1947. return True
  1948. else:
  1949. raise Exception(result.get("errmsg"))
  1950. except Exception as e:
  1951. log.error("报警发送失败。 报警内容 {}, error: {}".format(message, e))
  1952. return False
  1953. def email_warning(
  1954. message,
  1955. title,
  1956. message_prefix=None,
  1957. email_sender=None,
  1958. email_password=None,
  1959. email_receiver=None,
  1960. email_smtpserver=None,
  1961. rate_limit=None,
  1962. ):
  1963. # 为了加载最新的配置
  1964. email_sender = email_sender or setting.EMAIL_SENDER
  1965. email_password = email_password or setting.EMAIL_PASSWORD
  1966. email_receiver = email_receiver or setting.EMAIL_RECEIVER
  1967. email_smtpserver = email_smtpserver or setting.EMAIL_SMTPSERVER
  1968. rate_limit = rate_limit if rate_limit is not None else setting.WARNING_INTERVAL
  1969. if not all([message, email_sender, email_password, email_receiver]):
  1970. return
  1971. if reach_freq_limit(
  1972. rate_limit, email_receiver, email_sender, message_prefix or message
  1973. ):
  1974. log.info("报警时间间隔过短,此次报警忽略。 内容 {}".format(message))
  1975. return
  1976. if isinstance(email_receiver, str):
  1977. email_receiver = [email_receiver]
  1978. with EmailSender(
  1979. username=email_sender, password=email_password, smtpserver=email_smtpserver
  1980. ) as email:
  1981. return email.send(receivers=email_receiver, title=title, content=message)
  1982. def linkedsee_warning(message, rate_limit=3600, message_prefix=None, token=None):
  1983. """
  1984. 灵犀电话报警
  1985. Args:
  1986. message:
  1987. rate_limit:
  1988. message_prefix:
  1989. token:
  1990. Returns:
  1991. """
  1992. if not token:
  1993. log.info("未设置灵犀token,不支持报警")
  1994. return
  1995. if reach_freq_limit(rate_limit, token, message_prefix or message):
  1996. log.info("报警时间间隔过短,此次报警忽略。 内容 {}".format(message))
  1997. return
  1998. headers = {"servicetoken": token, "Content-Type": "application/json"}
  1999. url = "http://www.linkedsee.com/alarm/zabbix"
  2000. data = {"content": message}
  2001. response = requests.post(url, data=json.dumps(data), headers=headers)
  2002. return response
  2003. def wechat_warning(
  2004. message,
  2005. message_prefix=None,
  2006. rate_limit=None,
  2007. url=None,
  2008. user_phone=None,
  2009. all_users: bool = None,
  2010. ):
  2011. """企业微信报警"""
  2012. # 为了加载最新的配置
  2013. rate_limit = rate_limit if rate_limit is not None else setting.WARNING_INTERVAL
  2014. url = url or setting.WECHAT_WARNING_URL
  2015. user_phone = user_phone or setting.WECHAT_WARNING_PHONE
  2016. all_users = all_users if all_users is not None else setting.WECHAT_WARNING_ALL
  2017. if isinstance(user_phone, str):
  2018. user_phone = [user_phone] if user_phone else []
  2019. if all_users is True or not user_phone:
  2020. user_phone = ["@all"]
  2021. if not all([url, message]):
  2022. return
  2023. if reach_freq_limit(rate_limit, url, user_phone, message_prefix or message):
  2024. log.info("报警时间间隔过短,此次报警忽略。 内容 {}".format(message))
  2025. return
  2026. data = {
  2027. "msgtype": "text",
  2028. "text": {"content": message, "mentioned_mobile_list": user_phone},
  2029. }
  2030. headers = {"Content-Type": "application/json"}
  2031. try:
  2032. response = requests.post(
  2033. url, headers=headers, data=json.dumps(data).encode("utf8")
  2034. )
  2035. result = response.json()
  2036. response.close()
  2037. if result.get("errcode") == 0:
  2038. return True
  2039. else:
  2040. raise Exception(result.get("errmsg"))
  2041. except Exception as e:
  2042. log.error("报警发送失败。 报警内容 {}, error: {}".format(message, e))
  2043. return False
  2044. def send_msg(msg, level="debug", message_prefix=""):
  2045. if setting.WARNING_LEVEL == "ERROR":
  2046. if level != "error":
  2047. return
  2048. if setting.DINGDING_WARNING_URL:
  2049. keyword = "feapder报警系统\n"
  2050. dingding_warning(keyword + msg, message_prefix=message_prefix)
  2051. if setting.EMAIL_RECEIVER:
  2052. title = message_prefix or msg
  2053. if len(title) > 50:
  2054. title = title[:50] + "..."
  2055. email_warning(msg, message_prefix=message_prefix, title=title)
  2056. if setting.WECHAT_WARNING_URL:
  2057. keyword = "feapder报警系统\n"
  2058. wechat_warning(keyword + msg, message_prefix=message_prefix)
  2059. ###################
  2060. def make_item(cls, data: dict):
  2061. """提供Item类与原数据,快速构建Item实例
  2062. :param cls: Item类
  2063. :param data: 字典格式的数据
  2064. """
  2065. item = cls()
  2066. for key, val in data.items():
  2067. setattr(item, key, val)
  2068. return item
  2069. ###################
  2070. def aio_wrap(loop=None, executor=None):
  2071. """
  2072. wrap a normal sync version of a function to an async version
  2073. """
  2074. outer_loop = loop
  2075. outer_executor = executor
  2076. def wrap(fn):
  2077. @wraps(fn)
  2078. async def run(*args, loop=None, executor=None, **kwargs):
  2079. if loop is None:
  2080. if outer_loop is None:
  2081. loop = asyncio.get_event_loop()
  2082. else:
  2083. loop = outer_loop
  2084. if executor is None:
  2085. executor = outer_executor
  2086. pfunc = partial(fn, *args, **kwargs)
  2087. return await loop.run_in_executor(executor, pfunc)
  2088. return run
  2089. return wrap
  2090. ######### number ##########
  2091. def ensure_int(n):
  2092. """
  2093. >>> ensure_int(None)
  2094. 0
  2095. >>> ensure_int(False)
  2096. 0
  2097. >>> ensure_int(12)
  2098. 12
  2099. >>> ensure_int("72")
  2100. 72
  2101. >>> ensure_int('')
  2102. 0
  2103. >>> ensure_int('1')
  2104. 1
  2105. """
  2106. if not n:
  2107. return 0
  2108. return int(n)
  2109. def ensure_float(n):
  2110. """
  2111. >>> ensure_float(None)
  2112. 0.0
  2113. >>> ensure_float(False)
  2114. 0.0
  2115. >>> ensure_float(12)
  2116. 12.0
  2117. >>> ensure_float("72")
  2118. 72.0
  2119. """
  2120. if not n:
  2121. return 0.0
  2122. return float(n)
  2123. def ensure_int64(n):
  2124. """
  2125. >>> ensure_int64(None)
  2126. 0
  2127. >>> ensure_float(False)
  2128. 0
  2129. >>> ensure_float(12)
  2130. 12
  2131. >>> ensure_float("72")
  2132. 72
  2133. """
  2134. if not n:
  2135. return bson.int64.Int64(0)
  2136. return bson.int64.Int64(n)
  2137. def import_cls(cls_info):
  2138. module, class_name = cls_info.rsplit(".", 1)
  2139. cls = importlib.import_module(module).__getattribute__(class_name)
  2140. return cls
  2141. def load_globals(*module_name):
  2142. global_dict = globals()
  2143. module_vars = dir(module_name)
  2144. for var_name in module_vars:
  2145. if not var_name.startswith('__'):
  2146. var_value = getattr(module_name, var_name)
  2147. global_dict[var_name] = var_value
  2148. return global_dict