#定时从正式环境导出重要的网站和爬虫 from pymongo import MongoClient from util.mysql_tool import MysqlUtil # 初始化mysql conn = MysqlUtil.connect_to_mysql(host='192.168.3.14', port='4000', user='DataScBi', password='DT#Sc20221123Ht',database='quality') def export_site(): collection = MongoClient(f'mongodb://{"127.0.0.1:27089"}/',unicode_decode_error_handler="ignore", directConnection=True)["editor"]["site"] for info in collection.find({"important" : 1}).sort("_id", 1): site = info.get("site", '') domain = info.get("domain", '') query = """INSERT IGNORE INTO site_dict (site,domain) VALUES (%s, %s)""" params=(site,domain) MysqlUtil.insert_data(conn, query, params) def export_spider(): # 初始化mysql collection_lua = MongoClient(f'mongodb://{"127.0.0.1:27089"}/', unicode_decode_error_handler="ignore", directConnection=True)["editor"]["luaconfig"] for info in collection_lua.find({"spiderimportant":True}).sort("_id", 1): spider=info.get("code","") # 检查 spider 字段是否为空 if not spider: continue # 如果没有爬虫数据,跳过此条记录 query = """INSERT IGNORE INTO spider_dict (spider) VALUES (%s)""" params = (spider,) MysqlUtil.insert_data(conn, query, params) export_spider()