# -*- coding: utf-8 -*- """ Created on 2024-10-19 --------- @summary: --------- @author: Dzr """ import time from datetime import datetime import bson from pymongo import MongoClient Int64 = bson.int64.Int64 client = MongoClient('192.168.3.182', 27017) qlm_coll = client['zjb_poc']['qlm_data_lst'] jy_coll = client['zjb_poc']['jy_data_lst'] count = 0 insert_lst = [] with qlm_coll.find() as cursor: for item in cursor: href = item['url'] title = item['popTitle'] if 'popTitle' in item else item['showTitle'] publishtime = item['updateTime'] l_np_publishtime = datetime.strptime(publishtime, '%Y-%m-%d').timestamp() addr = str(item['areaName']).split('-') area = addr[0] if len(addr) > 0 else '' city = addr[1] if len(addr) > 1 else '' if '国土' in item.get('progName', ''): toptype = item['progName'] else: toptype = (item['noticeSegmentTypeName'] or item['progName']) data = { 'site': '千里马', 'channel': item['channel'], 'spidercode': 'sdxzbiddingsjzypc', 'area': area, 'city': city, 'district': '', 'comeintime': Int64(int(time.time())), 'isdownload': False, # 是否下载 'isfailed': False, # 是否失败 'title': title, # 标题 'href': href, # 信息链接 'publishtime': publishtime, # 发布时间(字符串) 'l_np_publishtime': Int64(l_np_publishtime), # 发布时间(时间戳) 'buyer': item['tenderees'], # 招标单位 'toptype': toptype, # 公告类型 'winner': item['bidder'] if item.get('bidder') is not None else '', # 中标单位 'agency': item['agent'] if item.get('agent') is not None else '', # 代理单位 } insert_lst.append(data) if len(insert_lst) == 100: jy_coll.insert_many(insert_lst, ordered=False) count += len(insert_lst) insert_lst = [] print('已处理{}条'.format(count)) if len(insert_lst) > 0: jy_coll.insert_many(insert_lst, ordered=False) count += len(insert_lst) print('已处理{}条'.format(count)) print('数据处理结束')