config.toml 6.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. [mongo] ## 白小虎服务器采集数据
  2. # host = "baibai.ink:28088"
  3. ## host = "127.0.0.1:27017"
  4. # db = "news"
  5. # username = ""
  6. # password = ""
  7. # size = 15
  8. # host = "baibai.ink:28088"
  9. host = "127.0.0.1:27081" ## 87
  10. db = "hp_news"
  11. username = ""
  12. password = ""
  13. size = 15
  14. direct = true
  15. [mongon] ## 读取 hp_news 数据
  16. host = "192.168.3.166:27082"
  17. db = "jichunling"
  18. coll = "hp_news" ## 数据表名
  19. username = ""
  20. password = ""
  21. size = 15
  22. direct = true
  23. [mongob] ## 标讯的数据
  24. host = "127.0.0.1:27083"
  25. db = "qfw"
  26. coll = "bidding_yq" ## 数据表名
  27. username = "SJZY_RWbid_ES"
  28. password = "SJZY@B4i4D5e6S"
  29. size = 15
  30. direct = true
  31. [env]
  32. # files = ["20230915惠普_舆情.xlsx"] ## 导入的文件名
  33. # files = ["20230914惠普_舆情数据.xlsx","20230918惠普_舆情数据.xlsx","20230919惠普_舆情数据.xlsx","20230920惠普_舆情数据.xlsx","20230921惠普_舆情数据.xlsx","20230922惠普_舆情数据.xlsx","20230925惠普_舆情数据.xlsx","20230926惠普_舆情数据.xlsx","20230927惠普_舆情数据.xlsx","20230928惠普_舆情数据.xlsx","20231007惠普_舆情数据.xlsx","20231008惠普_舆情数据.xlsx","20231009惠普_舆情数据.xlsx","20231010惠普_舆情数据.xlsx","20231011惠普_舆情数据.xlsx","20231012惠普_舆情数据.xlsx","20231013惠普_舆情数据.xlsx","20231016惠普_舆情数据.xlsx","20231017惠普_舆情数据.xlsx","20231018惠普_舆情数据.xlsx","20231019惠普_舆情数据.xlsx","20231020惠普_舆情数据.xlsx","20231023惠普_舆情数据.xlsx","20231024惠普_舆情数据.xlsx","20231025惠普_舆情数据.xlsx","20231026惠普_舆情数据.xlsx","20231027惠普_舆情数据.xlsx","20231030惠普_舆情数据.xlsx","20231031惠普_舆情数据.xlsx","20231101惠普_舆情数据.xlsx","20231102惠普_舆情数据.xlsx","20231103惠普_舆情数据.xlsx","20231106惠普_舆情数据.xlsx","20231107惠普_舆情数据.xlsx"] ## 导入的文件名
  34. start = -1 ## -1表示导出昨天数据
  35. end = 0
  36. shour = 12 ## 开始时间小时
  37. ehour = 12 ## 结束时间小时
  38. coll1 = "CorporatePublicOpinion" ## 官网数据
  39. coll2 = "news_detail" ## 百度的数据
  40. collb = "bidding_yq" ## lua 数据
  41. keyword = "news_Keywords"
  42. specials = ["μ", "#NAME?", "¥", "Δ", "æ", "¡", "ζ", "Ψ", "Φ", "ώ", "≈","ж","Զ","ń","α","޹","й","ë","л",
  43. "Λ", "Θ", "ß", "±", "Ύ", "φ", "¬", "Й", "Щ"]
  44. ## 标题排除词
  45. titleout = "(习近平|十八大|十九大|二十大|端午节|中秋节|建军节|建党节|诈骗|主题教育|招标|中标|成交|询价|磋商|竞价|招租|采购意向|马拉松|救人义举|安全生产月|防汛抗旱|社区|爱心|台风|比选公告|教育活动|消费|老党员|开展.+活动|举办.+活动|整改整治|新华|会见|等奖|获奖|进校园|反诈|百姓|专项整治工作|保障|日报|成功举办|新征程|获评|审查|可行性研究|接受.+查|一带一路|故事|荣获|候选人|宣传|专题|慰问|圆满完成|竞争性磋商|政治|思想|人民网|央广网|系统升级|网站维护|比选|入围.+名单|获.+奖|演习|新纪录|刷新.+记录|创新高|巡视|表彰|培训班|考察|涉嫌|减持|卖出|流出|致辞|买入.+股|粮食安全|日本|东京|韩国|首尔|朝鲜|平壤|蒙古|乌兰巴托|越南|河内|老挝|万象|柬埔寨|金边|缅甸|内比都|泰国|曼谷|马来西亚|吉隆坡|新加坡|新加坡|文莱|斯里巴加湾市|菲律宾|马尼拉|印度尼西亚|雅加达|东帝汶|帝力|英国|伦敦|法国|巴黎|德国|柏林|荷兰|阿姆斯特丹|丹麦|哥本哈根|瑞典|斯德哥尔摩|挪威|奥斯陆|芬兰|赫尔辛基|澳大利亚|堪培拉|新西兰|惠灵顿|巴布亚新几内亚|莫尔兹比港|巴西|巴西利亚|阿根廷|布宜诺斯艾利斯|埃及|开罗|阿尔及利亚|阿尔及尔|埃塞俄比亚|亚的斯亚贝巴|索马里|摩加迪沙|俄罗斯|莫斯科|白俄罗斯|明斯克|乌克兰|基辅|加拿大|渥太华|美国|华盛顿|巴西|巴西利亚|墨西哥|墨西哥城|阿尔及利亚|阿尔及尔|埃及|开罗|坦桑尼亚|达累斯萨拉姆|埃塞俄比亚|亚的斯亚贝巴|索马里|摩加迪沙|澳大利亚|堪培拉|新西兰|惠灵顿|巴布亚新几内亚|莫尔兹比港|论坛|峰会|战略合作|召开|调研|合作协议|突破|战略|健康检查|新加披|美国|英国|波黑|竞赛|专题活动|未经批准|铁路|地铁|高速公路|结业|座谈会|公路|中非|进博|领导拜访|会议|会谈|进博会|工作座谈|活动记录|签约|捐赠|证监会|事实不符|股权|合作|净值|下跌|参赛作品|核查意见|多股涨停|重组|市场|协议|广告投放|户型|价格|买入|加仓|公交快线|死亡|出生|融资|专利|卫生|股价|专访|展会|车展|互联网|谣言|进口|书展|党|单一来源|买入|亮相|分红|强制执行|学习强国|持.+股|采购|罚款|特别关注|奖学金|最新|大赛|报警|发行|细则|比赛|涨停|资金|房屋|展览|辞职|博览会|东南亚|秘鲁|基站|住房|浪潮信息|培训|院士|病人|警告|处分|疾病|泄洪|供暖|被查|举办|高速|隧道|非洲|万股|小区|到访|风险|开展.+教育|报告.+发布|成立.+周年|如何建成|成立大会|成立公告|任职资格|增长.+%|涨.+%|跌.+%|利率|开启预售|开售|欧洲|阿布扎|废标公告|生病|年会|交通建设|结果公告|莅临|轨道|最低售价|售价|促销优惠|优惠)"
  46. ## 内容排除词易|持.+股|采购
  47. detailout ="(责任编辑|沪ICP|会议指出|通讯员|报道|通讯.+报道|文章来源|.+会在.+成功举办|理财|端午节|中秋节|建军节|建党节|大会期间|致辞)"
  48. ## 标题关键词
  49. titlematch = "(开业|正式投用|成立新公司|图书馆正式开馆|项目顺利竣工|学校新建|投产运营|启动运营|竣工验收|正式投用|建成|竣工交付|项目竣工|工程开工|正式开工|全面完工|项目投产|工程竣工|揭牌成立|正式投用|施工完成|封顶|正式竣工|项目开工|建设总包项目|建设项目|办公大楼搬迁|选址|设立分支机构|开新店|门店拓展|项目规划启动会|获批变更|正式成立|获批设立|新员工|地址变更|获批筹建|成立|住所变更|迁址变更|场所变更|正式启用|获批升格|投入运营|投入使用|工商变更|正式运营|新建扩产|招聘|扩大生源|扩招|工程验收|项目交付|公司合并|主体完工|投运)"
  50. key = "4d5206b1b297c1e7b77f9578edcb2cf7.TNU2i8G1oUNdR02i" ## 智普api key
  51. model ="glm-4-air"