采集任务清单.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on 2024-10-18
  4. ---------
  5. @summary:
  6. ---------
  7. @author: Dzr
  8. """
  9. from pymongo import MongoClient
  10. import pandas as pd
  11. to_db = MongoClient('192.168.3.182', 27017)
  12. coll = to_db['31zg_poc']['keyword_company']
  13. suffix_lst = '重机,挖掘机,装载机,泵送,桥泵车,搅拌车,拖泵,搅拌站,车载泵,搅拌车,重起,汽车起重机,履带起重机,桩机,旋挖钻,大旋挖,中旋挖,小旋挖,路机,铣刨机,摊铺机,平地机,压路机,沥青站'.split(',')
  14. suffix_set = set(suffix_lst)
  15. f = '/Users/dongzhaorui/Desktop/qlm数据采集.xlsx'
  16. df = pd.read_excel(f)
  17. df.fillna('', inplace=True)
  18. company_lst = []
  19. for _, i in df.iterrows():
  20. items = i.to_dict()
  21. s_company = str(items['集团名称']).strip()
  22. s_sub_company = str(items['二级局名称']).strip()
  23. if s_company and s_company not in company_lst:
  24. company_lst.append(s_company)
  25. if s_sub_company and s_sub_company not in company_lst:
  26. company_lst.append(s_sub_company)
  27. data = []
  28. for suffix in suffix_set:
  29. print(suffix)
  30. for company in company_lst:
  31. data.append({'s_suffix': suffix, 's_company': company, 's_keyword': f'{company}+{suffix}'})
  32. if len(data) == 100:
  33. coll.insert_many(data, ordered=False)
  34. data = []
  35. if len(data) > 0:
  36. coll.insert_many(data, ordered=False)
  37. print('1234')