tools.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import datetime
  2. import hashlib
  3. import re
  4. import time
  5. from collections import namedtuple
  6. from lxml.html import HtmlElement, fromstring, tostring
  7. SearchText = namedtuple('SearchText', ['total'])
  8. def element2html(element: HtmlElement) -> str:
  9. return tostring(element, encoding="utf-8").decode()
  10. def html2element(html: str) -> HtmlElement:
  11. return fromstring(html)
  12. def valid_element(node: HtmlElement, feature: str):
  13. if len(node.xpath(feature)) > 0:
  14. return True
  15. else:
  16. return False
  17. def remove_node(node: HtmlElement):
  18. """
  19. this is a in-place operation, not necessary to return
  20. :param node:
  21. :return:
  22. """
  23. parent = node.getparent()
  24. if parent is not None:
  25. parent.remove(node)
  26. def verify_text(val: str):
  27. """检查数字、字母、中文的个数"""
  28. if val is None:
  29. return False
  30. sub_pattern = ['<[^>]+>', '[^0-9a-zA-Z\u4e00-\u9fa5]+']
  31. for pattern in sub_pattern:
  32. val = re.sub(pattern, '', val)
  33. # 若文本长度小于50,表示页面内容无详情内容
  34. if len(val) < 50:
  35. '''无效文本'''
  36. return False
  37. '''有效文本'''
  38. return True
  39. def sha1(text: str):
  40. """
  41. 十六进制数字字符串形式摘要值
  42. @param text: 字符串文本
  43. @return: 摘要值
  44. """
  45. _sha1 = hashlib.sha1()
  46. _sha1.update(text.encode("utf-8"))
  47. return _sha1.hexdigest()
  48. def get_ms() -> int:
  49. return int(round(time.time() * 1000))
  50. def get_current_date():
  51. return datetime.datetime.now().strftime("%Y-%m-%d")
  52. def ms2date(ms: int, fmt="%Y-%m-%d %H:%M:%S"):
  53. """毫秒转日期"""
  54. timestamp = float(ms / 1000)
  55. time_array = time.localtime(timestamp)
  56. return time.strftime(fmt, time_array)
  57. def convert2type(ts_str):
  58. """字符串类型时间戳转成整型"""
  59. return int(float(ts_str) / 1000)
  60. def ts2date(ts_str, fmt="%Y-%m-%d %H:%M:%S") -> str:
  61. """
  62. 时间戳转成日期
  63. :param ts_str: 毫秒级时间戳
  64. :param fmt: 日期格式
  65. :return: 日期
  66. """
  67. timestamp = int(float(ts_str) / 1000)
  68. time_array = time.localtime(timestamp)
  69. return time.strftime(fmt, time_array)
  70. def date2ts(date_str: str, fmt="%Y-%m-%d"):
  71. """日期转成时间戳"""
  72. time_array = time.strptime(date_str, fmt)
  73. timestamp = int(time.mktime(time_array))
  74. return timestamp
  75. def delay_by_hour(hour, fmt="%Y-%m-%d %H:%M:%S"):
  76. """按小时延时"""
  77. _hour = int(hour)
  78. _current_now = datetime.datetime.now()
  79. return (_current_now + datetime.timedelta(hours=_hour)).strftime(fmt)
  80. def delay_by_minutes(minutes, fmt="%Y-%m-%d %H:%M:%S"):
  81. """按分钟延时"""
  82. _minutes = int(minutes)
  83. _current_now = datetime.datetime.now()
  84. return (_current_now + datetime.timedelta(minutes=_minutes)).strftime(fmt)
  85. def delay_by_day(days, fmt="%Y-%m-%d %H:%M:%S"):
  86. """按天延时"""
  87. _days = int(days)
  88. _current_now = datetime.datetime.now()
  89. return (_current_now + datetime.timedelta(days=_days)).strftime(fmt)