12345678910111213141516171819202122232425262728293031 |
- #定时从正式环境导出重要的网站和爬虫
- from pymongo import MongoClient
- from util.mysql_tool import MysqlUtil
- # 初始化mysql
- conn = MysqlUtil.connect_to_mysql(host='192.168.3.14', port='4000', user='DataScBi', password='DT#Sc20221123Ht',database='quality')
- def export_site():
- collection = MongoClient(f'mongodb://{"127.0.0.1:27089"}/',unicode_decode_error_handler="ignore", directConnection=True)["editor"]["site"]
- for info in collection.find({"important" : 1}).sort("_id", 1):
- site = info.get("site", '')
- domain = info.get("domain", '')
- query = """INSERT IGNORE INTO site_dict (site,domain) VALUES (%s, %s)"""
- params=(site,domain)
- MysqlUtil.insert_data(conn, query, params)
- def export_spider():
- # 初始化mysql
- collection_lua = MongoClient(f'mongodb://{"127.0.0.1:27089"}/', unicode_decode_error_handler="ignore", directConnection=True)["editor"]["luaconfig"]
- for info in collection_lua.find({"spiderimportant":True}).sort("_id", 1):
- spider=info.get("code","")
- # 检查 spider 字段是否为空
- if not spider:
- continue # 如果没有爬虫数据,跳过此条记录
- query = """INSERT IGNORE INTO spider_dict (spider) VALUES (%s)"""
- params = (spider,)
- MysqlUtil.insert_data(conn, query, params)
- export_spider()
|