export_site.py 1.3 KB

12345678910111213141516171819202122232425262728293031
  1. #定时从正式环境导出重要的网站和爬虫
  2. from pymongo import MongoClient
  3. from util.mysql_tool import MysqlUtil
  4. # 初始化mysql
  5. conn = MysqlUtil.connect_to_mysql(host='192.168.3.14', port='4000', user='DataScBi', password='DT#Sc20221123Ht',database='quality')
  6. def export_site():
  7. collection = MongoClient(f'mongodb://{"127.0.0.1:27089"}/',unicode_decode_error_handler="ignore", directConnection=True)["editor"]["site"]
  8. for info in collection.find({"important" : 1}).sort("_id", 1):
  9. site = info.get("site", '')
  10. domain = info.get("domain", '')
  11. query = """INSERT IGNORE INTO site_dict (site,domain) VALUES (%s, %s)"""
  12. params=(site,domain)
  13. MysqlUtil.insert_data(conn, query, params)
  14. def export_spider():
  15. # 初始化mysql
  16. collection_lua = MongoClient(f'mongodb://{"127.0.0.1:27089"}/', unicode_decode_error_handler="ignore", directConnection=True)["editor"]["luaconfig"]
  17. for info in collection_lua.find({"spiderimportant":True}).sort("_id", 1):
  18. spider=info.get("code","")
  19. # 检查 spider 字段是否为空
  20. if not spider:
  21. continue # 如果没有爬虫数据,跳过此条记录
  22. query = """INSERT IGNORE INTO spider_dict (spider) VALUES (%s)"""
  23. params = (spider,)
  24. MysqlUtil.insert_data(conn, query, params)
  25. export_spider()