1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- # -*- coding: utf-8 -*-
- """
- Created on 2024-10-18
- ---------
- @summary:
- ---------
- @author: Dzr
- """
- from pymongo import MongoClient
- import pandas as pd
- to_db = MongoClient('192.168.3.182', 27017)
- coll = to_db['31zg_poc']['keyword_company']
- suffix_lst = '重机,挖掘机,装载机,泵送,桥泵车,搅拌车,拖泵,搅拌站,车载泵,搅拌车,重起,汽车起重机,履带起重机,桩机,旋挖钻,大旋挖,中旋挖,小旋挖,路机,铣刨机,摊铺机,平地机,压路机,沥青站'.split(',')
- suffix_set = set(suffix_lst)
- f = '/Users/dongzhaorui/Desktop/qlm数据采集.xlsx'
- df = pd.read_excel(f)
- df.fillna('', inplace=True)
- company_lst = []
- for _, i in df.iterrows():
- items = i.to_dict()
- s_company = str(items['集团名称']).strip()
- s_sub_company = str(items['二级局名称']).strip()
- if s_company and s_company not in company_lst:
- company_lst.append(s_company)
- if s_sub_company and s_sub_company not in company_lst:
- company_lst.append(s_sub_company)
- data = []
- for suffix in suffix_set:
- print(suffix)
- for company in company_lst:
- data.append({'s_suffix': suffix, 's_company': company, 's_keyword': f'{company}+{suffix}'})
- if len(data) == 100:
- coll.insert_many(data, ordered=False)
- data = []
- if len(data) > 0:
- coll.insert_many(data, ordered=False)
- print('1234')
|