123456789101112131415161718192021222324252627282930313233343536373839404142 |
- import hashlib
- import socket
- import re
- def sha1(text: str):
- """
- 十六进制数字字符串形式摘要值
- @param text: 字符串文本
- @return: 摘要值
- """
- _sha1 = hashlib.sha1()
- _sha1.update(text.encode("utf-8"))
- return _sha1.hexdigest()
- def get_host_ip():
- s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
- try:
- s.connect(('8.8.8.8', 80))
- ip = s.getsockname()[0]
- finally:
- s.close()
- return ip
- def check_crawl_title(title: str):
- crawl_keywords = {
- '招标', '流标', '评标', '询价', '中标候选人', '抽签', '谈判', '中选', '意见征询',
- '更正公告', '废标', '补遗', '议价', '邀请', '资格预审', '竞标', '变更', '遴选',
- '磋商', '项目', '评审', '询比', '开标', '澄清', '比选', '中止', '采购', '竟价',
- '招投标', '拟建', '成交', '中标', '竞争性谈判', '工程', '验收公告', '更正',
- '单一来源', '变更公告', '合同', '违规', '评判', '监理', '竞价', '答疑',
- '终止', '系统'
- }
- for keyword in crawl_keywords:
- valid_keyword = re.search(keyword, title)
- if valid_keyword is not None:
- return valid_keyword
- else:
- return None
|