wcc 1 年間 前
コミット
462ee4c3e6

+ 45 - 0
data_tag/config.go

@@ -0,0 +1,45 @@
+package main
+
+type GlobalConf struct {
+	Mongo  MgoConf
+	Env    EnvConf
+	Esa    EsConf
+	Esb    EsConf
+	Labels map[string]LabelData
+}
+
+type MgoConf struct {
+	Host     string
+	DB       string
+	Coll     string // 查询表
+	Username string
+	Password string
+	Size     int
+	Direct   bool
+}
+
+type EnvConf struct {
+	PortraitIndex string
+	PortraitMgo   string
+	Start         int
+	End           int
+	Spec          string
+	Isw           bool //是否保存标签里的权重
+}
+
+type EsConf struct {
+	URL      string
+	Username string
+	Password string
+	Index    string
+}
+
+// LabelData  标签配置
+type LabelData struct {
+	Name    string        // 标签名称
+	Field   []string      //识别字段
+	Sfield  string        //保存字段
+	Rule    []string      // 具体规则
+	RegRule []interface{} //规则的DFA
+	Weight  float64       // 权重
+}

+ 833 - 0
data_tag/config.toml

@@ -0,0 +1,833 @@
+[mongo]  ## 标讯地址
+#    host = "127.0.0.1:27083"
+#    #        host = "172.17.189.140:27080"
+#    db = "qfw"
+#    coll = ""
+#    username = "SJZY_RWbid_ES"
+#    password = "SJZY@B4i4D5e6S"
+#    direct = true  ## 本地代理时需要打开,
+
+
+    host = "127.0.0.1:27017"
+    #        host = "172.17.189.140:27080"
+    db = "wcc"
+    coll = ""
+    username = ""
+    password = ""
+#    direct = true  ## 本地代理时需要打开,
+
+
+
+[esa] ## 华为云集群2
+#    url = "http://172.17.4.184:19908" ## 线上地址
+    url = "http://127.0.0.1:19908"
+    username = "jybid"
+    password = "Top2023_JEB01i@31"
+
+[esb] ## 华为云集群1
+#    url = "http://172.17.4.184:19905" ## 线上地址
+    url = "http://127.0.0.1:19905"
+    username = "jybid"
+    password = "Top2023_JEB01i@31"
+
+[env]
+
+    start = -2 ## 开始取2天前的数据,
+    end = -1 ## 截止取1天前的数据,
+    spec = "0 00 01 * * *"  ## 定时任务,每天凌晨1点执行
+    isw = true ## 是否保存权重
+
+
+[[labels]] ## 标签规则
+
+    [labels.1]
+    name = "外交" ## 标签名称
+    field = ["company_name"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(中共办公室)"]
+    weight =0.7  ## 权重
+
+
+    [labels.2]
+    name = "发展和改革" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(发展和改革|发展治理委员会|发展治理委员会|发展治理服务局|统筹发展局|发展和改革委员会|发展改革局)"]
+    weight =1  ## 权重
+
+    [labels.2a]
+    name = "发展和改革" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(服务业发展局|建设管理局|经济发展部|计划经贸局|投资合作部|经济发展局|科技创新局|经济发展服务局|发展局|开发区创新发展部)"]
+    weight =0.9  ## 权重
+
+
+    [labels.3]
+    name = "科学技术" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(科学技术|科技|工业和科技信息化)"]
+    weight =1  ## 权重
+
+    [labels.4]
+    name = "民族事务" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(民族事务|民族宗教事务|民族宗教局)"]
+    weight =1  ## 权重
+
+    [labels.5]
+    name = "保密局" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(国家保密局|机要保密局|保密局|机要局|保密工作局|密码管理局)"]
+    weight =1  ## 权重
+
+
+    [labels.6]
+    name = "国安局" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(国家安全局)"]
+    weight =1  ## 权重
+
+    [labels.7]
+    name = "司法" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(司法|政法委员会|法律援助中心|行政执法局|法制办|纪检监察)"]
+    weight =1  ## 权重
+
+    [labels.7a]
+    name = "司法" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(公证处|价格成本调查)"]
+    weight =1  ## 权重
+
+
+    [labels.8]
+    name = "法院" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(人民法院)"]
+    weight =1  ## 权重
+
+
+    [labels.9]
+    name = "检察院" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(检察院)"]
+    weight =1  ## 权重
+
+
+    [labels.10]
+    name = "检察院" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(检察院)"]
+    weight =1  ## 权重
+
+
+    [labels.11]
+    name = "人力资源和社会保障" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(人力资源和社会保障|社会事业局|人力资源社会保障局|人力资源和社人保障局|人事和社会保障|人力资源管理局|劳动就业服务中心|人事劳动和社会保障局|人事劳动局|人力资源和社会劳动保障|劳动保障事务所|社会保障服务中心)"]
+    weight =1  ## 权重
+
+    [labels.11a]
+    name = "人力资源和社会保障" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(民间组织管理局|社会事务局|总工会|组织人事局|人员服务管理工作组|社会保险中心|社会保险事业管理局|劳动保障监察大队|养老保险中心|人才和知识分子工作|社会保险管理处|人才服务中心)"]
+    weight =0.9  ## 权重
+
+    [labels.12]
+    name = "生态环境" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(生态环境|环保局|环境督查中心|规划建设环境保护所|自然保护区)"]
+    weight =1  ## 权重
+
+    [labels.13]
+    name = "交通运输" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(交通运输|交通局|交通管理局)"]
+    weight =1  ## 权重
+
+    [labels.13a]
+    name = "交通运输" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(公路事业发展中心|船业发展中心|道路运输业|公路质量检测中心)"]
+    weight =0.8  ## 权重
+
+
+    [labels.14]
+    name = "农业农村" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(农业农村|畜牧局|农业局|农林和农机局|农牧局|农业|农林水牧局|农村工作|乡村振兴|农场|蚕种场)"]
+    weight =1  ## 权重
+
+    [labels.14a]
+    name = "农业农村" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(畜牧规划设计|畜牧兽医站)"]
+    weight =0.8  ## 权重
+
+
+    [labels.15]
+    name = "退役军人事务" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(退役军人事务|老干部局|老干部管理局|退役军人)"]
+    weight =1  ## 权重
+
+
+    [labels.16]
+    name = "人民银行" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(人民银行)"]
+    weight =1  ## 权重
+
+
+    [labels.17]
+    name = "国防" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(国防|统一战线工作部|统战部|国防科技工业|国防科技|国防动员)"]
+    weight =1  ## 权重
+
+
+
+    [labels.18]
+    name = "教育" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(教育|教体局|小学|中学|幼儿园|技校|大学|学校)"]
+    weight =1  ## 权重
+
+    [labels.18a]
+    name = "教育" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(青少年发展中心)"]
+    weight =1  ## 权重
+
+
+    [labels.19]
+    name = "党校" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(党校|干部学院)"]
+    weight =1  ## 权重
+
+    [labels.20]
+    name = "工业和信息化" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(工业和信息化|工业信息化局|信息化和科技局|科技工信局|工业信息化和科技局|工业信息化|工业和科技信息化|通信管理局)"]
+    weight =1  ## 权重
+
+    [labels.21]
+    name = "公安" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(公安|拘留所|人民警察|看守大队|出入境边防检查)"]
+    weight =1  ## 权重
+
+    [labels.22]
+    name = "民政" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(民政|民主促进会|民主同盟|国民党革命委员|民主建国会|农工民主党|民间组织管理局)^(人民政府)"]
+    weight =1  ## 权重
+
+    [labels.23]
+    name = "财政" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(财政)"]
+    weight =1  ## 权重
+
+    [labels.24]
+    name = "自然资源" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(自然资源|环境保护局|煤炭管理局|资源规划|资源局|规划局|农林水牧局|国土资源管理局|规划管理局|开发区规划建设部|规划建设筹备处|测绘地理信息技术中心|地图院)^(医院)"]
+    weight =1  ## 权重
+
+    [labels.24a]
+    name = "自然资源" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(地质|煤业)"]
+    weight =0.7  ## 权重
+
+    [labels.25]
+    name = "住建" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(住房和城乡建设|建设局|房屋征收事务局|建筑业管理局|住房和城乡规划建设管理局|建设指挥部|住建|房保障和房地产管理|住房保障和房屋管理|住房保障局|房产服务中心|住房保障中心|不动产登记和交易中心|房产管理经营|房屋管理事务中心|房产管理局)"]
+    weight =1  ## 权重
+
+
+    [labels.25a]
+    name = "住建" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(城市改造服务局|建设工程质量检测室|房地产监察大队)"]
+    weight =0.8  ## 权重
+
+    [labels.26]
+    name = "水利" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(水利|农林水牧局|水文水资源测报中心|水库管理局|黄河河务局|水库事务中心|水库保障所|水管所|水文水资源)"]
+    weight =1  ## 权重
+
+    [labels.27]
+    name = "商务" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(商务|招商融资促进局|商业联合会|投资服务局|招商局|发展服务中心|融资服务局|招商引资局|协同发展服务中心|城市发展服务局|市场发展中心)"]
+    weight =1  ## 权重
+
+    [labels.28]
+    name = "卫生健康" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(卫生健康|卫生和计划生育|中医管理局|卫生局|人口和计划生育|卫生运动|健康委员会|卫生服务中心|市容环卫|市容卫生|计划生育|卫生所|市容环境卫生)"]
+    weight =1  ## 权重
+
+    [labels.29]
+    name = "应急管理" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(应急管理|安全生产监督管理局|应急救护)"]
+    weight =1  ## 权重
+
+    [labels.30]
+    name = "审计" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(审计|督察局|督查局|监督局|监督稽查|监管局|监督检查|监察厅|纪律检查委员会|监察委员会)"]
+    weight =1  ## 权重
+
+    [labels.31]
+    name = "国有资产监督管理" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(国有资产监督管理|国有资产管理局)"]
+    weight =1  ## 权重
+
+    [labels.32]
+    name = "海关" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(海关|港航管理局|海事局)"]
+    weight =1  ## 权重
+
+    [labels.33]
+    name = "市场监督" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(市场监督|监察局|监督稽查|监管局|监督检查|监察厅|督查局|监督局|物价检查所|调查监审局|12315|投诉举报维权中心|价格成本调查监审队|稽查大队|市价格成本监测所|物价局)"]
+    weight =1  ## 权重
+
+    [labels.33a]
+    name = "市场监督" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(物价管理办公室|盐业管理局)"]
+    weight =1  ## 权重
+
+    [labels.34]
+    name = "证券监督管理" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(证券监督)"]
+    weight =1  ## 权重
+
+    [labels.35]
+    name = "体育" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(物价管理办公室|盐业管理局)"]
+    weight =1  ## 权重
+
+    [labels.36]
+    name = "统计" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(物价管理办公室|盐业管理局)"]
+    weight =1  ## 权重
+
+    [labels.37]
+    name = "国际发展合作" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(国际发展合作|旅游和外事侨务局)"]
+    weight =1  ## 权重
+
+    [labels.38]
+    name = "税务" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(税务)"]
+    weight =1  ## 权重
+
+    [labels.39]
+    name = "金融" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(金融监督|金融)"]
+    weight =1  ## 权重
+
+    [labels.40]
+    name = "广播电视" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(广播电视|广播局|电视台|广播电影|电视局|广电|广电科技局|旅游和文化广电局|文化和广播影视局|文化广电局|融媒体中心|转播台|新闻传媒中心)"]
+    weight =1  ## 权重
+
+    [labels.41]
+    name = "信访" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(信访)"]
+    weight =1  ## 权重
+
+    [labels.42]
+    name = "知识产权" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(知识产权)"]
+    weight =1  ## 权重
+
+    [labels.43]
+    name = "医疗保障" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(医疗保障)"]
+    weight =1  ## 权重
+
+    [labels.44]
+    name = "新华通讯社" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(新华通讯社|新华社|新闻出版局|杂志社)"]
+    weight =1  ## 权重
+
+    [labels.45]
+    name = "气象" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(气象)"]
+    weight =1  ## 权重
+
+    [labels.46]
+    name = "科学院" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(科学院)"]
+    weight =1  ## 权重
+
+    [labels.47]
+    name = "工程院" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(工程院)"]
+    weight =1  ## 权重
+
+    [labels.48]
+    name = "粮食和物资储备" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(粮食和物资储备|粮食局)"]
+    weight =1  ## 权重
+
+    [labels.49]
+    name = "数据" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(数据)"]
+    weight =1  ## 权重
+
+    [labels.50]
+    name = "烟草专卖" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(烟草专卖|烟草|香烟)"]
+    weight =1  ## 权重
+
+    [labels.51]
+    name = "林业和草原" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(林业和草原|林业|草原|畜牧产业服务中心|农林局|林场)"]
+    weight =1  ## 权重
+
+    [labels.52]
+    name = "民用航空" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(民用航空)"]
+    weight =1  ## 权重
+
+    [labels.53]
+    name = "文物" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(文物|世界文化遗产)"]
+    weight =1  ## 权重
+
+    [labels.54]
+    name = "疾病预防控制" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(疾病预防控制|疾控中心|疾病预防|检疫局)"]
+    weight =1  ## 权重
+
+    [labels.55]
+    name = "消防救援" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(消防救援|消防队)"]
+    weight =1  ## 权重
+
+    [labels.56]
+    name = "药品监督" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(药品监督|药品检验所)"]
+    weight =1  ## 权重
+
+    [labels.57]
+    name = "能源" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(能源|煤炭管理局|煤炭工业管理局|煤炭工业局)"]
+    weight =1  ## 权重
+
+    [labels.58]
+    name = "移民" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(移民)"]
+    weight =1  ## 权重
+
+    [labels.59]
+    name = "铁路" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(铁路)"]
+    weight =1  ## 权重
+
+    [labels.60]
+    name = "邮政" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(邮政)"]
+    weight =1  ## 权重
+
+    [labels.61]
+    name = "中医药" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(中医药)"]
+    weight =1  ## 权重
+
+    [labels.62]
+    name = "外汇" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(外汇)"]
+    weight =1  ## 权重
+
+    [labels.63]
+    name = "供销合作社" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(供销合作社)"]
+    weight =1  ## 权重
+
+    [labels.64]
+    name = "公共资源交易中心" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(公共资源交易中心)"]
+    weight =1  ## 权重
+
+    [labels.65]
+    name = "监狱" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(监狱|戒毒所|看守所|犯管教所|罪犯遣送)"]
+    weight =1  ## 权重
+
+    [labels.66]
+    name = "城乡建设" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(城乡建设|城乡规划管理局|城乡发展|住房和城乡建设|建设局|房屋征收事务局|建筑业管理局|住房和城乡规划建设管理局|建设指挥部|城乡规划设计)"]
+    weight =1  ## 权重
+
+    [labels.67]
+    name = "文旅" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(广电科技局|旅游和文化广电局|文化和广播影视局|旅游局|文化厅|文化局|旅游委员会|旅游发展委|旅游管理区|文化广电局|文化和旅游|文旅|文化服务中心)"]
+    weight =1  ## 权重
+
+    [labels.68]
+    name = "人民防空" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(人民防空|人防办)"]
+    weight =1  ## 权重
+
+    [labels.69]
+    name = "园林" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(园林)"]
+    weight =1  ## 权重
+
+    [labels.70]
+    name = "物流口岸" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(物流口岸)"]
+    weight =1  ## 权重
+
+    [labels.71]
+    name = "大数据" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(大数据)"]
+    weight =1  ## 权重
+
+    [labels.72]
+    name = "政务服务" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(政务服务|综合行政执法|行政服务中心|登记管理局|行政审批和政务信息|行政管理局|政务信息)"]
+    weight =1  ## 权重
+
+    [labels.72a]
+    name = "政务服务" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(检查委员会|行政审批|办事处)"]
+    weight =0.8  ## 权重
+
+    [labels.73]
+    name = "地方史志" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(地方史志)"]
+    weight =1  ## 权重
+
+    [labels.74]
+    name = "住房公积金管理中心" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(住房公积金管理中心|住房公积金)"]
+    weight =1  ## 权重
+
+    [labels.75]
+    name = "仲裁" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(仲裁)"]
+    weight =1  ## 权重
+
+    [labels.76]
+    name = "招商" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(建设投资|投资促进局|招商发展局)"]
+    weight =1  ## 权重
+
+    [labels.76a]
+    name = "招商" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(开发区建设管理部)"]
+    weight =1  ## 权重
+
+    [labels.77]
+    name = "社保中心" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(社保)"]
+    weight =1  ## 权重
+
+    [labels.78]
+    name = "管委会" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(管委会|风景区开发建设委员会|香山湖管理区|管理区管理委员会|景区管理委员会)^(中共|政协|人大)","(示范区|开发区|产业集聚区)(委员会)"]
+    weight =1  ## 权重
+
+    [labels.79]
+    name = "人民政府" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(人民政府|公务员局|公务员管理局|镇政府|街道办事处|国务院研究室|发展研究中心|机关事务|参事室|乡政府|县政府|便民服务中心|区政府)"]
+    weight =1  ## 权重
+
+    [labels.79a]
+    name = "人民政府" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(委巡察办|巡察工作保障中心|食品安全委员会|平安建设办公室|社会治安综合治理中心|敬老院)"]
+    weight =0.8  ## 权重
+
+    [labels.80]
+    name = "工商联" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(工商联合会|工商业联合会)"]
+    weight =1  ## 权重
+
+    [labels.81]
+    name = "残联" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(残疾人联合会|关心下一代工作委员会)"]
+    weight =1  ## 权重
+
+    [labels.82]
+    name = "妇联" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(妇女联合会|妇联)"]
+    weight =1  ## 权重
+
+    [labels.83]
+    name = "艺术联" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(艺术界联合会|艺术联合会)"]
+    weight =1  ## 权重
+
+    [labels.84]
+    name = "侨联" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(侨眷联合会|华侨联合会)"]
+    weight =1  ## 权重
+
+    [labels.85]
+    name = "台联" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(台胞台属联谊会)"]
+    weight =1  ## 权重
+
+    [labels.86]
+    name = "城管" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(城市管理|城市综合执法局|市场综合执法队|车站地区管理处|社会治理和社区服务局)"]
+    weight =1  ## 权重
+
+    [labels.87]
+    name = "编办" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(编制委员会|事业单位登记管理)"]
+    weight =1  ## 权重
+
+    [labels.88]
+    name = "政协" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(政治协商|政协)"]
+    weight =1  ## 权重
+
+    [labels.89]
+    name = "民主党派" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(九三学社|致公党)"]
+    weight =1  ## 权重
+
+    [labels.90]
+    name = "党委" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(共产党|中共)(委员会|研究室|办公室|宣传部|办公厅|招待所|县委)","(群众工作部|直属机关委员会|党群工作部|巡察组|精神文明建设指导委员会|市委办公室|县委|委员会宣传部|市委宣传部)"]
+    weight =1  ## 权重
+
+    [labels.90a]
+    name = "党委" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(区委重点工作|接待办公室|市委政法委)"]
+    weight =0.9  ## 权重
+
+    [labels.91]
+    name = "团委" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(共产主义青年团|共青团|青年团)(委员会|县委)"]
+    weight =1  ## 权重
+
+    [labels.91a]
+    name = "团委" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(希望工程办公室)"]
+    weight =0.8  ## 权重
+
+    [labels.92]
+    name = "人大" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(人大常委会|人大工作联络处|人民代表大会|人大)"]
+    weight =1  ## 权重
+
+    [labels.93]
+    name = "档案局" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(档案局|档案馆)"]
+    weight =1  ## 权重
+
+    [labels.94]
+    name = "武装" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(武装部)"]
+    weight =1  ## 权重
+
+    [labels.95]
+    name = "医院" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(医院|卫生院)"]
+    weight =1  ## 权重
+
+    [labels.96]
+    name = "渔业" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(渔业办|鱼种场)"]
+    weight =1  ## 权重
+
+    [labels.97]
+    name = "学校" ## 标签名称
+    field = ["conpamy_name","organizer"] ## 识别字段
+    sfield = "company_types" ## 标签保存字段
+    rule =["(小学|中学|幼儿园|技校|大学|学校)"]
+    weight =1  ## 权重

+ 49 - 0
data_tag/go.mod

@@ -0,0 +1,49 @@
+module data_tag
+
+go 1.20
+
+require (
+	github.com/spf13/viper v1.19.0
+	jygit.jydev.jianyu360.cn/data_processing/common_utils v0.0.0-20240202055658-e2ef72e18b40
+)
+
+require (
+	github.com/PuerkitoBio/goquery v1.8.0 // indirect
+	github.com/andybalholm/cascadia v1.3.1 // indirect
+	github.com/dchest/captcha v1.0.0 // indirect
+	github.com/fsnotify/fsnotify v1.7.0 // indirect
+	github.com/golang/snappy v0.0.1 // indirect
+	github.com/hashicorp/hcl v1.0.0 // indirect
+	github.com/josharian/intern v1.0.0 // indirect
+	github.com/klauspost/compress v1.17.2 // indirect
+	github.com/magiconair/properties v1.8.7 // indirect
+	github.com/mailru/easyjson v0.7.7 // indirect
+	github.com/mitchellh/mapstructure v1.5.0 // indirect
+	github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe // indirect
+	github.com/olivere/elastic/v7 v7.0.32 // indirect
+	github.com/pelletier/go-toml/v2 v2.2.2 // indirect
+	github.com/pkg/errors v0.9.1 // indirect
+	github.com/sagikazarmark/locafero v0.4.0 // indirect
+	github.com/sagikazarmark/slog-shim v0.1.0 // indirect
+	github.com/sourcegraph/conc v0.3.0 // indirect
+	github.com/spf13/afero v1.11.0 // indirect
+	github.com/spf13/cast v1.6.0 // indirect
+	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/subosito/gotenv v1.6.0 // indirect
+	github.com/xdg-go/pbkdf2 v1.0.0 // indirect
+	github.com/xdg-go/scram v1.1.1 // indirect
+	github.com/xdg-go/stringprep v1.0.3 // indirect
+	github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d // indirect
+	go.mongodb.org/mongo-driver v1.10.1 // indirect
+	go.uber.org/atomic v1.9.0 // indirect
+	go.uber.org/multierr v1.9.0 // indirect
+	golang.org/x/crypto v0.21.0 // indirect
+	golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
+	golang.org/x/net v0.23.0 // indirect
+	golang.org/x/sync v0.6.0 // indirect
+	golang.org/x/sys v0.18.0 // indirect
+	golang.org/x/text v0.14.0 // indirect
+	gopkg.in/ini.v1 v1.67.0 // indirect
+	gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 // indirect
+	gopkg.in/yaml.v3 v3.0.1 // indirect
+)

+ 247 - 0
data_tag/go.sum

@@ -0,0 +1,247 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/BurntSushi/toml v1.2.0/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
+github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U=
+github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
+github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
+github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/aws/aws-sdk-go v1.43.21/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
+github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
+github.com/dchest/captcha v1.0.0 h1:vw+bm/qMFvTgcjQlYVTuQBJkarm5R0YSsDKhm1HZI2o=
+github.com/dchest/captcha v1.0.0/go.mod h1:7zoElIawLp7GUMLcj54K9kbw+jEyvz2K0FDdRRYhvWo=
+github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8vw=
+github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
+github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
+github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
+github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/go-sql-driver/mysql v1.6.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
+github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
+github.com/gomodule/redigo v1.8.9/go.mod h1:7ArFNvsTjH8GMMzB4uy1snslv2BwmginuMs06a1uzZE=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.7/go.mod h1:n+brtR0CgQNWTVd5ZUFpTBC8YFBDLK/h/bpaJ8/DtOE=
+github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
+github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
+github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
+github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
+github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
+github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
+github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
+github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
+github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
+github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY=
+github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0=
+github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
+github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
+github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
+github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
+github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe h1:iruDEfMl2E6fbMZ9s0scYfZQ84/6SPL6zC8ACM2oIL0=
+github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
+github.com/nsqio/go-nsq v1.1.0/go.mod h1:vKq36oyeVXgsS5Q8YEO7WghqidAVXQlcFxzQbQTuDEY=
+github.com/olivere/elastic/v7 v7.0.32 h1:R7CXvbu8Eq+WlsLgxmKVKPox0oOwAE/2T9Si5BnvK6E=
+github.com/olivere/elastic/v7 v7.0.32/go.mod h1:c7PVmLe3Fxq77PIfY/bZmxY/TAamBhCzZ8xDOE09a9k=
+github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
+github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM=
+github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
+github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
+github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ=
+github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4=
+github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE=
+github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ=
+github.com/smartystreets/assertions v1.1.1/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo=
+github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM=
+github.com/smartystreets/gunit v1.4.2/go.mod h1:ZjM1ozSIMJlAz/ay4SG8PeKF00ckUp+zMHZXV9/bvak=
+github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
+github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0=
+github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8=
+github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY=
+github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0=
+github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo=
+github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
+github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
+github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI=
+github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
+github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
+github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
+github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
+github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
+github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
+github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
+github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8=
+github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU=
+github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
+github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
+github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c=
+github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI=
+github.com/xdg-go/scram v1.1.1 h1:VOMT+81stJgXW3CpHyqHN3AXDYIMsx56mEFrB37Mb/E=
+github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g=
+github.com/xdg-go/stringprep v1.0.3 h1:kdwGpVNwPFtjs98xCGkHjQtGKh86rDcRZN17QEMCOIs=
+github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8=
+github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d h1:splanxYIlg+5LfHAM6xpdFEAYOk8iySO56hMFq6uLyA=
+github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
+github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
+go.mongodb.org/mongo-driver v1.10.1 h1:NujsPveKwHaWuKUer/ceo9DzEe7HIj1SlJ6uvXZG0S4=
+go.mongodb.org/mongo-driver v1.10.1/go.mod h1:z4XpeoU6w+9Vht+jAFyLgVrD+jGSQQe0+CBWFHNiHt8=
+go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E=
+go.opentelemetry.io/otel v1.5.0/go.mod h1:Jm/m+rNp/z0eqJc74H7LPwQ3G87qkU/AnnAydAjSAHk=
+go.opentelemetry.io/otel/trace v1.5.0/go.mod h1:sq55kfhjXYr1zVSyexg0w1mpa03AYXR5eyTkB9NPPdE=
+go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE=
+go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
+go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ=
+go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU=
+go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI=
+go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ=
+go.uber.org/zap v1.22.0/go.mod h1:H4siCOZOrAolnUPJEkfaSjDqyP+BDS0DdDWzwcgt3+U=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
+golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
+golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
+golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM=
+golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
+golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
+golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
+golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ=
+golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
+golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
+golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
+golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
+golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
+golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
+golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
+gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
+gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
+gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22 h1:VpOs+IwYnYBaFnrNAeB8UUWtL3vEUnzSCL1nVjPhqrw=
+gopkg.in/mgo.v2 v2.0.0-20190816093944-a6b53ec6cb22/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
+gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k=
+gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
+gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+jygit.jydev.jianyu360.cn/data_processing/common_utils v0.0.0-20240202055658-e2ef72e18b40 h1:xTeRmpFgwOdu+NbWg/YntX3MnQpttm7jj33C1+JdBTk=
+jygit.jydev.jianyu360.cn/data_processing/common_utils v0.0.0-20240202055658-e2ef72e18b40/go.mod h1:1Rp0ioZBhikjXHYYXmnzL6RNfvTDM/2XvRB+vuPLurI=

+ 55 - 0
data_tag/init.go

@@ -0,0 +1,55 @@
+package main
+
+import (
+	"github.com/spf13/viper"
+	es "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
+	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
+)
+
+func InitConfig() (err error) {
+	viper.SetConfigFile("config.toml") // 指定配置文件路径
+	viper.SetConfigName("config")      // 配置文件名称(无扩展名)
+	viper.SetConfigType("toml")        // 如果配置文件的名称中没有扩展名,则需要配置此项
+
+	viper.AddConfigPath("./")
+	viper.AddConfigPath("./conf/")  // 还可以在工作目录中查找配置
+	viper.AddConfigPath("../conf/") // 还可以在工作目录中查找配置
+	err = viper.ReadInConfig()      // 查找并读取配置文件
+	if err != nil {                 // 处理读取配置文件的错误
+		return
+	}
+
+	err = viper.Unmarshal(&GF)
+
+	return err
+
+}
+
+func InitMgo() {
+	Mgo = &mongodb.MongodbSim{
+		MongodbAddr: GF.Mongo.Host,
+		//MongodbAddr: "127.0.0.1:27083",
+		Size:     10,
+		DbName:   GF.Mongo.DB,
+		UserName: GF.Mongo.Username,
+		Password: GF.Mongo.Password,
+		Direct:   GF.Mongo.Direct,
+	}
+	Mgo.InitPool()
+
+	Esa = &es.Elastic{
+		S_esurl:  GF.Esa.URL,
+		I_size:   5,
+		Username: GF.Esa.Username,
+		Password: GF.Esa.Password,
+	}
+	Esa.InitElasticSize()
+
+	Esb = &es.Elastic{
+		S_esurl:  GF.Esb.URL,
+		I_size:   5,
+		Username: GF.Esb.Username,
+		Password: GF.Esb.Password,
+	}
+	Esb.InitElasticSize()
+}

+ 89 - 0
data_tag/main.go

@@ -0,0 +1,89 @@
+package main
+
+import (
+	"fmt"
+	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	es "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
+	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
+	"log"
+	"regexp"
+)
+
+var (
+	Mgo *mongodb.MongodbSim
+	GF  GlobalConf
+	Esa *es.Elastic
+	Esb *es.Elastic
+	//Labels []LabelData //全局所有标签规则
+)
+
+type DataRes struct {
+	Name   string
+	Weight float64
+}
+
+func main() {
+	REG, _ = regexp.Compile(`\(.*?\)\d*`)
+	InitConfig()
+	InitMgo()
+
+	//data := map[string]interface{}{
+	//	"conpamy_name": "发展和改革投资",
+	//	"cc_name":      "ccdd",
+	//	"name":         "发展治理委员会",
+	//}
+
+	defer util.Catch()
+	sess := Mgo.GetMgoConn()
+	defer Mgo.DestoryMongoConn(sess)
+	it := sess.DB("wcc").C("bidding").Find(nil).Select(nil).Iter()
+	fmt.Println("taskRun 开始")
+	count := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		ResMap := make([]LabelData, 0)
+		id := mongodb.BsonIdToSId(tmp["_id"])
+		for _, v := range GF.Labels {
+			//fmt.Println(v)
+			v.RegRule = DealRules(v.Rule)
+			if v.Name == "自然资源" {
+				log.Println(v)
+			}
+			//Labels = append(Labels, v)
+			//匹配规则,返回规则内匹配的关键词
+			//根据识别字段,开始各个字段匹配
+			for _, vv := range v.Field {
+				text := util.ObjToString(tmp[vv])
+				if text == "" {
+					continue
+				}
+				rs, _ := DFAAnalyRules(text, v.RegRule)
+				if rs {
+					ResMap = append(ResMap, v)
+					continue
+				}
+			}
+		}
+
+		//
+		// 处理有重复的标签
+		mergedMap := MergeLabelData(ResMap)
+		for key, values := range mergedMap {
+			fmt.Printf("Sfield: %s\n", key)
+			datas := make([]DataRes, 0)
+			for _, value := range values {
+				fmt.Printf("  Name: %s, Weight: %f\n", value.Name, value.Weight)
+				dar := DataRes{
+					Name:   value.Name,
+					Weight: value.Weight,
+				}
+				datas = append(datas, dar)
+				//log.Println("aaaaaa", key, "==", datas)
+				update := map[string]interface{}{
+					key: datas,
+				}
+				Mgo.UpdateById("bidding", id, map[string]interface{}{"$set": update})
+			}
+		}
+	}
+
+}

+ 18 - 0
data_tag/readme.md

@@ -0,0 +1,18 @@
+
+### 功能
+给数据打标记,使用分类程序规则结构;y依据配置文件配置 `labels` 标记数据,标记结果如下:
+```json
+    "base_types" : [
+        {
+            "name" : "发展和改革",
+            "weight" : 1.0
+        },
+        {
+            "name" : "党委",
+            "weight" : 0.7
+        }
+    ]
+
+```
+
+

+ 27 - 0
data_tag/tag_test.go

@@ -0,0 +1,27 @@
+package main
+
+import (
+	"fmt"
+	"testing"
+)
+
+func TestMergeLabelData(t *testing.T) {
+	labelDatas := []LabelData{
+		{Name: "A", Sfield: "SF1", Weight: 2.0},
+		{Name: "A", Sfield: "SF1", Weight: 3.0},
+		{Name: "A", Sfield: "SF1", Weight: 1.5},
+		{Name: "B", Sfield: "SF2", Weight: 1.5},
+		{Name: "B", Sfield: "SF2", Weight: 0.5},
+		{Name: "C", Sfield: "SF3", Weight: 1.5},
+	}
+
+	mergedMap := MergeLabelData(labelDatas)
+	for key, values := range mergedMap {
+		fmt.Printf("Sfield: %s\n", key)
+		for _, value := range values {
+			fmt.Printf("  Name: %s, Weight: %f\n", value.Name, value.Weight)
+		}
+	}
+
+	return
+}

+ 325 - 0
data_tag/tools.go

@@ -0,0 +1,325 @@
+package main
+
+import (
+	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
+	"log"
+	"math"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+var REG *regexp.Regexp
+
+type RuleDFA struct {
+	Match       []DFA //包含的敏感词
+	MatchNum    []int //包含敏感词匹配个数
+	MisMatch    DFA   //不包含的敏感词
+	MisMatchNum int   //不包含敏感词匹配个数
+}
+
+type DFA struct {
+	Link map[string]interface{}
+}
+
+// DealRules 处理识别规则
+func DealRules(rules []string) (i_rule []interface{}) {
+	for _, r := range rules {
+		if strings.HasPrefix(r, "'") && strings.HasSuffix(r, "'") { //正则
+			rs := []rune(r)
+			ru := string(rs[1 : len(rs)-1])
+			rureg, err := regexp.Compile(ru)
+			if err != nil {
+				log.Println("error---rule:", r)
+				continue
+			}
+			i_rule = append(i_rule, []interface{}{rureg}...)
+		} else { //规则,加入到敏感词匹配
+			matchnum := 0
+			mismatchnum := 0
+			isnum1 := false
+			isnum2 := false
+			numArr := make([]int, 0)
+			ruleDFA := &RuleDFA{
+				Match:    []DFA{},
+				MisMatch: DFA{},
+			}
+			tmpArr := strings.Split(r, "^")
+			matchTmp := tmpArr[0]
+			ruleTextArr := REG.FindAllString(matchTmp, -1)
+			for _, match := range ruleTextArr {
+				matchnum, isnum1 = GetNum(match)
+				numArr = append(numArr, matchnum)
+				matchArr := GetRule(match, isnum1)
+				tmpDFA := DFA{
+					Link: make(map[string]interface{}),
+				}
+				tmpDFA.AddWord(matchArr...)
+				ruleDFA.Match = append(ruleDFA.Match, tmpDFA)
+			}
+			if len(tmpArr) == 2 {
+				mismatch := tmpArr[1]
+				mismatchnum, isnum2 = GetNum(mismatch)
+				mismatchArr := GetRule(mismatch, isnum2)
+				ruleDFA.MisMatch.AddWord(mismatchArr...)
+			}
+			ruleDFA.MatchNum = numArr
+			ruleDFA.MisMatchNum = mismatchnum
+			i_rule = append(i_rule, []interface{}{ruleDFA}...)
+		}
+	}
+	return
+}
+
+func (d *DFA) AddWord(keys ...string) {
+	d.AddWordAll(true, keys...)
+}
+
+func (d *DFA) AddWordAll(haskey bool, keys ...string) {
+	if d.Link == nil {
+		d.Link = make(map[string]interface{})
+	}
+	for _, key := range keys {
+		nowMap := &d.Link
+		for i := 0; i < len(key); i++ {
+			kc := key[i : i+1]
+			if v, ok := (*nowMap)[kc]; ok {
+				nowMap, _ = v.(*map[string]interface{})
+			} else {
+				newMap := map[string]interface{}{}
+				newMap["YN"] = "0"
+				(*nowMap)[kc] = &newMap
+				nowMap = &newMap
+			}
+			if i == len(key)-1 {
+				(*nowMap)["YN"] = "1"
+				if haskey {
+					(*nowMap)["K"] = key
+				}
+			}
+		}
+	}
+}
+
+func (d *DFA) CheckSensitiveWord(src string, n int) (bool, []string) {
+	res := make([]string, 0)
+	tmpMap := make(map[string]int)
+	for j := 0; j < len(src); j++ {
+		nowMap := &d.Link
+		for i := j; i < len(src); i++ {
+			word := src[i : i+1]
+			nowMap, _ = (*nowMap)[word].(*map[string]interface{})
+			if nowMap != nil { // 存在,则判断是否为最后一个
+				if "1" == util.ObjToString((*nowMap)["YN"]) {
+					s := util.ObjToString((*nowMap)["K"])
+					tmpMap[s] = 1
+					//nowMap = &d.Link //匹配到之后继续匹配后边的内容
+				}
+			} else {
+				//nowMap = &d.Link
+				break
+			}
+		}
+	}
+	if len(tmpMap) >= n {
+		for k, _ := range tmpMap {
+			res = append(res, k)
+		}
+		return true, res
+	}
+	return false, []string{}
+}
+
+// ObjArrToStringArr interface 数组转string 数组
+func ObjArrToStringArr(old []interface{}) []string {
+	defer func() {
+		if r := recover(); r != nil {
+			// 在此处添加错误处理逻辑,例如记录错误日志
+		}
+	}()
+	if old != nil {
+		new := make([]string, 0)
+		for _, v := range old {
+			if strValue, ok := v.(string); ok {
+				new = append(new, strValue)
+			} else {
+				// 在此处添加对非字符串类型值的处理逻辑,例如记录错误日志
+			}
+		}
+		return new
+	} else {
+		return nil
+	}
+}
+
+// GetRule 获取规则
+func GetRule(text string, isnum bool) (matchArr []string) {
+	if isnum { //最后一个不是数字
+		if strings.HasPrefix(text, "(") && strings.HasSuffix(text, ")") {
+			text = text[1 : len(text)-1]
+			matchArr = strings.Split(text, "|")
+		}
+	} else if strings.HasPrefix(text, "(") && !isnum {
+		text = text[1 : len(text)-2]
+		matchArr = strings.Split(text, "|")
+	}
+	return matchArr
+}
+
+// GetNum 获取匹配或不匹配的个数
+func GetNum(rule string) (int, bool) {
+	num := 1
+	isnum := strings.HasSuffix(rule, ")")
+	if !isnum { //是数字
+		s := []rune(rule)
+		last := string(s[len(s)-1:])
+		num = IntAll(last)
+	}
+	return num, isnum
+}
+
+func IntAll(num interface{}) int {
+	return IntAllDef(num, 0)
+}
+
+func IntAllDef(num interface{}, defaultNum int) int {
+	if i, ok := num.(int); ok {
+		return int(i)
+	} else if i0, ok0 := num.(int32); ok0 {
+		return int(i0)
+	} else if i1, ok1 := num.(float64); ok1 {
+		return int(i1)
+	} else if i2, ok2 := num.(int64); ok2 {
+		return int(i2)
+	} else if i3, ok3 := num.(float32); ok3 {
+		return int(i3)
+	} else if i4, ok4 := num.(string); ok4 {
+		in, _ := strconv.Atoi(i4)
+		return int(in)
+	} else if i5, ok5 := num.(int16); ok5 {
+		return int(i5)
+	} else if i6, ok6 := num.(int8); ok6 {
+		return int(i6)
+	} else {
+		return defaultNum
+	}
+}
+
+// TagDFAAnalyRules 单独的标签识别规则
+func TagDFAAnalyRules(text string, rules []interface{}) (res []string) {
+	defer util.Catch()
+	for _, r := range rules {
+		rDFA, b := r.(*RuleDFA)
+		//util.Debug(j, "规则===", b, rDFA.Match, rDFA.MatchNum, rDFA.MisMatch, rDFA.MisMatchNum)
+		if b { //规则DFA
+			//util.Debug("res========", res, len(rDFA.MatchNum) == len(rDFA.Match), len(rDFA.MatchNum))
+			if len(rDFA.MatchNum) == len(rDFA.Match) {
+				for i, matchnum := range rDFA.MatchNum {
+					if matchnum >= 1 {
+						btmp, restmp := rDFA.Match[i].CheckSensitiveWord(text, matchnum)
+						if !btmp { //逗号隔开的每条规则不匹配,继续匹配下一条
+							//log.Println("继续匹配")
+							break
+						}
+						res = append(res, restmp...)
+					}
+				}
+			}
+		}
+	}
+	return
+}
+
+// DFAAnalyRules DFA识别规则
+func DFAAnalyRules(text string, rules []interface{}) (bool, []string) {
+	var arr []string
+	//log.Println("len===", len(rules))
+	for _, r := range rules {
+		//log.Println("i--------------", i)
+		ruleReg, ok := r.(*regexp.Regexp)
+		if ok { //正则
+			//log.Println("正则===", ruleReg)
+			textArr := ruleReg.FindAllString(text, -1)
+			if len(textArr) > 0 {
+				regStr := []string{ruleReg.String()}
+				return true, regStr
+			}
+		} else {
+			rDFA, b := r.(*RuleDFA)
+			//log.Println(j, "规则===", b, rDFA.Match, rDFA.MatchNum, rDFA.MisMatch, rDFA.MisMatchNum)
+			if b { //规则DFA
+				//b1, b2 := false, false
+				b1, b2 := false, true
+				var res []string
+				//log.Println("res========", res, len(rDFA.MatchNum) == len(rDFA.Match), len(rDFA.MatchNum))
+				if len(rDFA.MatchNum) == len(rDFA.Match) {
+					for i, matchnum := range rDFA.MatchNum {
+						if matchnum >= 1 {
+							btmp, restmp := rDFA.Match[i].CheckSensitiveWord(text, matchnum)
+							//log.Println("btmp====", btmp, restmp)
+							if !btmp { //逗号隔开的每条规则不匹配,继续匹配下一条
+								//log.Println("继续匹配")
+								b2 = false
+								break
+							}
+							res = append(res, restmp...)
+						}
+					}
+				}
+				if !b2 {
+					continue
+				}
+				//走到这一步证明需要匹配的词正确个数满足要求,下面判断不需要匹配的词的情况
+				mismatchnum := rDFA.MisMatchNum
+				if mismatchnum >= 1 { //有排除词,排除词不应该出现在匹配的文本中
+					b1, _ = rDFA.MisMatch.CheckSensitiveWord(text, mismatchnum)
+				} else {
+					b1 = false
+				}
+				if !b1 { //不要匹配的词满足情况,跳出
+					return true, res
+				} else {
+					continue
+				}
+			}
+		}
+	}
+	return false, arr
+}
+
+// MergeLabelData 处理标记权重
+func MergeLabelData(labelDatas []LabelData) map[string][]LabelData {
+	result := make(map[string][]LabelData)
+
+	for _, data := range labelDatas {
+		// 检查是否已存在相同 Sfield 的数据
+		if existingDatas, ok := result[data.Sfield]; ok {
+			merged := false
+			for i, existingData := range existingDatas {
+				// 如果 Name 和 Sfield 都相同,合并 Weight
+				if existingData.Name == data.Name && existingData.Sfield == data.Sfield {
+					existingDatas[i].Weight = round(existingData.Weight+data.Weight, 2)
+					merged = true
+					break
+				}
+			}
+			// 如果未合并,添加新数据
+			if !merged {
+				result[data.Sfield] = append(result[data.Sfield], data)
+			}
+		} else {
+			result[data.Sfield] = []LabelData{data}
+		}
+	}
+
+	return result
+}
+
+// 对浮点数进行四舍五入保留指定位数小数
+func round(num float64, decimalPlaces int) float64 {
+	var multiplier float64 = 1
+	for i := 0; i < decimalPlaces; i++ {
+		multiplier *= 10
+	}
+	return math.Round(num*multiplier) / multiplier
+}

+ 0 - 1
data_tmp/test.go

@@ -1 +0,0 @@
-package main

+ 15 - 0
doubao/go.mod

@@ -0,0 +1,15 @@
+module doubao
+
+go 1.20
+
+require (
+	github.com/itcwc/go-zhipu v0.0.0-20240626065325-ffc8bf1cfaaa
+	github.com/volcengine/volcengine-go-sdk v1.0.150
+)
+
+require (
+	github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
+	github.com/google/uuid v1.3.0 // indirect
+	github.com/jmespath/go-jmespath v0.4.0 // indirect
+	github.com/volcengine/volc-sdk-golang v1.0.23 // indirect
+)

+ 94 - 0
doubao/go.sum

@@ -0,0 +1,94 @@
+cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
+github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
+github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
+github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
+github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
+github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
+github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
+github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
+github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
+github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
+github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
+github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
+github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
+github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
+github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
+github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
+github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
+github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
+github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
+github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
+github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/itcwc/go-zhipu v0.0.0-20240626065325-ffc8bf1cfaaa h1:iOly0dSYv9AdoWfWt3uk4IF4O/nW+fyXV9rnC87UC7s=
+github.com/itcwc/go-zhipu v0.0.0-20240626065325-ffc8bf1cfaaa/go.mod h1:z7QZm7ol2nikFFGHwArJr1NTtBSE0M0g9MvHKxm1Sw0=
+github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
+github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
+github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
+github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
+github.com/kr/pretty v0.2.0/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
+github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
+github.com/volcengine/volc-sdk-golang v1.0.23 h1:anOslb2Qp6ywnsbyq9jqR0ljuO63kg9PY+4OehIk5R8=
+github.com/volcengine/volc-sdk-golang v1.0.23/go.mod h1:AfG/PZRUkHJ9inETvbjNifTDgut25Wbkm2QoYBTbvyU=
+github.com/volcengine/volcengine-go-sdk v1.0.150 h1:1e169un9p5gMuyDG32IqfXkHjR+FG4BSio8PgjP2sZQ=
+github.com/volcengine/volcengine-go-sdk v1.0.150/go.mod h1:oht5AKDJsk0fY6tV2ViqaVlOO14KSRmXZlI8ikK60Tg=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
+golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
+golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
+golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
+golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
+golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
+golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
+golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
+golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
+golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
+golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
+golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
+golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
+google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
+google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
+google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
+google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
+google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
+google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
+google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
+google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
+google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
+google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
+google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
+google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
+google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
+google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
+google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
+gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
+gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
+honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=

+ 117 - 0
doubao/main.go

@@ -0,0 +1,117 @@
+package main
+
+import (
+	"context"
+	"fmt"
+	"github.com/volcengine/volcengine-go-sdk/service/arkruntime"
+	"github.com/volcengine/volcengine-go-sdk/service/arkruntime/model"
+	"github.com/volcengine/volcengine-go-sdk/volcengine"
+)
+
+func main() {
+	//botChat()
+	zhiPu()
+}
+
+// normalChat 豆包普通对话
+func normalChat() {
+	client := arkruntime.NewClientWithApiKey("f9fd581f-8bea-488a-86b1-49c7d46176b5")
+	ctx := context.Background()
+
+	fmt.Println("----- standard request -----")
+	req := model.ChatCompletionRequest{
+		Model: "ep-20240726175416-5x8pq",
+		Messages: []*model.ChatCompletionMessage{
+			{
+				Role: model.ChatMessageRoleSystem,
+				Content: &model.ChatCompletionMessageContent{
+					StringValue: volcengine.String("你是一家打印机供应商,请根据我给你提供的线索评测是否有购入打印机的需求,并给出权重。\n通常需要新购打印机的场景有:\n1、医院、学校、事业单位的办公楼新建、扩建、搬迁\n2、新公司成立、公司变更地址\n我的正文如下:"),
+				},
+			},
+			{
+				Role: model.ChatMessageRoleUser,
+				Content: &model.ChatCompletionMessageContent{
+					StringValue: volcengine.String("2024年7月4日,来源于国家金融监督管理总局威海监管分局的消息,泰山财产保险股份有限公司威海市荣成支公司获批变更营业场所。经审查,同意其营业场所由原“荣成市南山中路19号二楼(201、202、203、204、205)”变更为“荣成市明珠路203号楼303一楼(部分区域)、二楼”。变更营业场所后,公司不得在原址再行办理保险业务,并须及时拆除原址标识及标牌。\n关注同花顺财经(ths518),获取更多机会\n\n输出结果格式如下:\n相关标签:相关/不相关 相关权重:0-1之间,以JSON格式给我"),
+				},
+			},
+		},
+	}
+
+	resp, err := client.CreateChatCompletion(ctx, req)
+	if err != nil {
+		fmt.Printf("standard chat error: %v\n", err)
+		return
+	}
+	fmt.Println(*resp.Choices[0].Message.Content.StringValue)
+
+	//fmt.Println("----- streaming request -----")
+	//req = model.ChatCompletionRequest{
+	//	Model: "${YOUR_ENDPOINT_ID}",
+	//	Messages: []*model.ChatCompletionMessage{
+	//		{
+	//			Role: model.ChatMessageRoleSystem,
+	//			Content: &model.ChatCompletionMessageContent{
+	//				StringValue: volcengine.String("你是豆包,是由字节跳动开发的 AI 人工智能助手"),
+	//			},
+	//		},
+	//		{
+	//			Role: model.ChatMessageRoleUser,
+	//			Content: &model.ChatCompletionMessageContent{
+	//				StringValue: volcengine.String("常见的十字花科植物有哪些?"),
+	//			},
+	//		},
+	//	},
+	//}
+	//stream, err := client.CreateChatCompletionStream(ctx, req)
+	//if err != nil {
+	//	fmt.Printf("stream chat error: %v\n", err)
+	//	return
+	//}
+	//defer stream.Close()
+	//
+	//for {
+	//	recv, err := stream.Recv()
+	//	if err == io.EOF {
+	//		return
+	//	}
+	//	if err != nil {
+	//		fmt.Printf("Stream chat error: %v\n", err)
+	//		return
+	//	}
+	//
+	//	if len(recv.Choices) > 0 {
+	//		fmt.Print(recv.Choices[0].Delta.Content)
+	//	}
+	//}
+}
+
+// botChat coze智能体;代码有问题,暂时未解决,BotId 不对
+func botChat() {
+	client := arkruntime.NewClientWithApiKey("f9fd581f-8bea-488a-86b1-49c7d46176b5")
+	ctx := context.Background()
+
+	fmt.Println("----- standard request -----")
+	req := model.BotChatCompletionRequest{
+		BotId: "7396876125751836712",
+		Messages: []*model.ChatCompletionMessage{
+			{
+				Role: model.ChatMessageRoleUser,
+				Content: &model.ChatCompletionMessageContent{
+					StringValue: volcengine.String("2024年7月4日,来源于国家金融监督管理总局威海监管分局的消息,泰山财产保险股份有限公司威海市荣成支公司获批变更营业场所。经审查,同意其营业场所由原“荣成市南山中路19号二楼(201、202、203、204、205)”变更为“荣成市明珠路203号楼303一楼(部分区域)、二楼”。变更营业场所后,公司不得在原址再行办理保险业务,并须及时拆除原址标识及标牌。\n关注同花顺财经(ths518),获取更多机会\n\n输出结果格式如下:\n相关标签:相关/不相关 相关权重:0-1之间,以JSON格式给我"),
+				},
+			},
+		},
+	}
+
+	resp, err := client.CreateBotChatCompletion(ctx, req)
+	if err != nil {
+		fmt.Printf("standard chat error: %v\n", err)
+		return
+	}
+	fmt.Println(*resp.Choices[0].Message.Content.StringValue)
+	if resp.References != nil {
+		for _, ref := range resp.References {
+			fmt.Printf("reference url: %s\n", ref.Url)
+		}
+	}
+}

+ 2 - 0
doubao/readme.txt

@@ -0,0 +1,2 @@
+
+豆包 大模型 api 调用

+ 49 - 0
doubao/zhipu.go

@@ -0,0 +1,49 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	zhipu "github.com/itcwc/go-zhipu/model_api"
+	"log"
+	"strings"
+)
+
+// zhiPu 智普AI
+func zhiPu() {
+	expireAtTime := int64(1719803252) // token 过期时间
+	mssage := zhipu.PostParams{
+		Model: "glm-4-flash",
+		Messages: []zhipu.Message{
+			{
+				Role:    "user", // 消息的角色信息 详见文档
+				Content: "你是一家打印机供应商,请根据我给你提供的线索评测是否有购入打印机的需求,并给出权重。\\n通常需要新购打印机的场景有:\\n1、医院、学校、事业单位的办公楼新建、扩建、搬迁\\n2、新公司成立、公司变更地址\\n我的正文如下:\\n\\n2024年7月4日,来源于国家金融监督管理总局威海监管分局的消息,泰山财产保险股份有限公司威海市荣成支公司获批变更营业场所。经审查,同意其营业场所由原“荣成市南山中路19号二楼(201、202、203、204、205)”变更为“荣成市明珠路203号楼303一楼(部分区域)、二楼”。变更营业场所后,公司不得在原址再行办理保险业务,并须及时拆除原址标识及标牌。\\n关注同花顺财经(ths518),获取更多机会\\n\\n输出结果格式如下:\\n{相关标签:相关, 相关权重:0.8},输出结果以JSON形式返回;相关标签 只有相关和不相关二个结果;相关权重是0-1之间的数字",
+			},
+		},
+	}
+
+	apiKey := "6c86cea8659ff1d33b161ea7213ea97c.m4OcENaRan8NeLSZ"
+
+	postResponse, err := zhipu.BeCommonModel(expireAtTime, mssage, apiKey)
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+
+	rest := make(map[string]interface{})
+	if choices, ok := postResponse["choices"].([]interface{}); ok {
+		if choice, ok2 := choices[0].(map[string]interface{}); ok2 {
+			if message, ok3 := choice["message"].(map[string]interface{}); ok3 {
+				if content, ok4 := message["content"].(string); ok4 {
+					content = strings.ReplaceAll(content, "\n", "")
+					content = strings.ReplaceAll(content, "json", "")
+					content = strings.ReplaceAll(content, "`", "")
+					err = json.Unmarshal([]byte(content), &rest)
+					if err != nil {
+						log.Println("Unmarshal err", err, "content:", content)
+					}
+				}
+			}
+		}
+	}
+	fmt.Println(rest)
+}

BIN
export_bidding/getXiaMenWinner


+ 1 - 1
getEs/clickhouse.go

@@ -108,7 +108,7 @@ func click() {
 	username := "datascbi"
 	password := "Da#Bi20221111SC"
 	//host := "127.0.0.1:4001"
-	host := "172.17.4.242:4000"
+	host := "172.17.162.25:4000"
 	database := "global_common_data"
 	dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
 	// 连接到数据库

+ 84 - 10
getEs/main.go

@@ -5,6 +5,8 @@ import (
 	"encoding/json"
 	"fmt"
 	"github.com/olivere/elastic/v7"
+	"gorm.io/driver/mysql"
+	"gorm.io/gorm"
 	"io"
 	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
@@ -17,7 +19,7 @@ func main() {
 	/**
 	getProjectData  click 是一起使用的,统计获取中标企业信息
 	*/
-	getProjectDataFromEs() //1.拉取项目中标成交数据
+	//getProjectDataFromEs() //1.拉取项目中标成交数据
 	//click() //2.处理项目数据,写入clickhouse
 	//click2()
 	//dealData()
@@ -25,6 +27,8 @@ func main() {
 	//getProject()
 	//getQyLimitData()
 	//getBiddingData()
+	//getQyxytData()
+	//getTidb()
 	log.Println("over ------------------ over")
 }
 
@@ -367,11 +371,15 @@ func getQyxytData() {
 	//2024-1 - 2024-4;1704038400-1711900800
 	//2023-10-1 2024-1-1;1696089600-1704038400
 	//城市范围
-	areaTermsQuery := elastic.NewTermsQuery("company_city", "平顶山市")
-	rangeQuery := elastic.NewRangeQuery("establish_date").Gte(1704038400)
-	query := elastic.NewBoolQuery().
-		Must(areaTermsQuery).
-		Must(rangeQuery)
+	//areaTermsQuery := elastic.NewTermsQuery("company_city", "北京市")
+	//rangeQuery := elastic.NewRangeQuery("establish_date").Gte(1704038400)
+	//query := elastic.NewBoolQuery().
+	//	Must(areaTermsQuery).
+	//	Must(rangeQuery)
+	//---------------------------//
+	query := elastic.NewBoolQuery()
+	query.Must(elastic.NewMatchQuery("business_scope", "招投标代理"))
+	query.Must(elastic.NewTermQuery("company_city", "北京市"))
 
 	//rangeQuery := elastic.NewRangeQuery("comeintime").Gte("1640966400").Lt("1703952000")
 	//query := elastic.NewBoolQuery().
@@ -419,10 +427,19 @@ func getQyxytData() {
 				continue
 			}
 
-			//存入新表
-			err = MgoB.InsertOrUpdate("qfw", "wcc_2024_pingdingshan", doc)
-			if err != nil {
-				log.Println("error", doc["id"])
+			if strings.Contains(util.ObjToString(doc["business_scope"]), "招投标代理") {
+				//存入新表
+				insert := map[string]interface{}{
+					"company_name":   doc["company_name"],
+					"business_scope": doc["business_scope"],
+					"employee_name":  doc["employee_name"],
+					"company_phone":  doc["company_phone"],
+				}
+				err = MgoB.InsertOrUpdate("qfw", "wcc_2024_beijing_dailijigou", insert)
+				if err != nil {
+					log.Println("error", doc["id"])
+				}
+
 			}
 
 			//sWinner := util.ObjToString(doc["s_winner"])
@@ -536,6 +553,63 @@ func getQyLimitData() {
 
 }
 
+// getTidb 获取tidb 数据
+func getTidb() {
+	MgoB := &mongodb.MongodbSim{
+		MongodbAddr: "172.17.189.140:27080",
+		//MongodbAddr: "127.0.0.1:27083",
+		Size:     10,
+		DbName:   "qfw",
+		UserName: "SJZY_RWbid_ES",
+		Password: "SJZY@B4i4D5e6S",
+		//Direct:   true,
+	}
+	MgoB.InitPool()
+
+	//tidb
+	username := "datascbi"
+	password := "Da#Bi20221111SC"
+	//host := "127.0.0.1:4001"
+	host := "172.17.162.25:4000"
+	database := "global_common_data"
+	dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
+	// 连接到数据库
+	db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
+	if err != nil {
+		log.Println("Failed to connect to database:", err)
+		return
+	}
+
+	fmt.Println("Connected to the database!")
+	defer util.Catch()
+	sess := MgoB.GetMgoConn()
+	defer MgoB.DestoryMongoConn(sess)
+
+	it := sess.DB("qfw").C("wcc_2024_beijing_dailijigou").Find(nil).Select(nil).Iter()
+	fmt.Println("taskRun 开始")
+	count := 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); count++ {
+		if count%10000 == 0 {
+			log.Println("current:", count)
+		}
+
+		companyName := util.ObjToString(tmp["company_name"])
+		var baseInfo EnterpriseBaseInfo
+		db.Where(&EnterpriseBaseInfo{Name: companyName}).First(&baseInfo)
+		if baseInfo.ID > 0 {
+			insert := map[string]interface{}{
+				"company_name":   companyName,
+				"name_id":        baseInfo.NameID,
+				"business_scope": tmp["business_scope"],
+			}
+			MgoB.InsertOrUpdate("qfw", "wcc_beijing_daili_bidding", insert)
+		}
+	}
+
+	log.Println("over")
+
+}
+
 // IsInStringArray 判断数组中是否存在字符串
 func IsInStringArray(str string, arr []string) bool {
 	// 先对字符串数组进行排序

+ 189 - 47
updateBidding/main.go

@@ -1,13 +1,18 @@
 package main
 
 import (
+	"context"
+	"encoding/json"
 	"fmt"
+	es7 "github.com/olivere/elastic/v7"
 	"github.com/wcc4869/common_utils/log"
 	"go.uber.org/zap"
+	"io"
 	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
 	"reflect"
+	"strings"
 	"time"
 )
 
@@ -110,7 +115,7 @@ func Init() {
 
 func main() {
 	Init()
-	InitEsBiddingField()
+	//InitEsBiddingField()
 	go updateMethod()   //更新mongodb
 	go updateEsMethod() //更新es
 	//go updateProjectEsMethod()
@@ -453,11 +458,22 @@ func dealBidding() {
 	sess := MgoB.GetMgoConn()
 	defer MgoB.DestoryMongoConn(sess)
 
+	where := map[string]interface{}{
+		"comeintime": map[string]interface{}{
+			"$lt": 1722418770,
+			//"$lt":  1718812802,
+			"$gte": 1722009600,
+		},
+	}
+
 	//where := map[string]interface{}{
-	//	"title": "2020年12月采购意向项目-3",
+	//	"_id": map[string]interface{}{
+	//		"$gte": mongodb.StringTOBsonId("66aa067e66cf0db42a8ea71e"),
+	//		"$lt":  mongodb.StringTOBsonId("66aa067e66cf0db42a8ea720"),
+	//	},
 	//}
 
-	it := sess.DB("qfw").C("bidding").Find(nil).Select(nil).Iter()
+	it := sess.DB("qfw").C("bidding").Find(where).Select(nil).Iter()
 
 	fmt.Println("taskRun 开始")
 	count := 0
@@ -490,57 +506,92 @@ func dealBidding() {
 		  ],
 		*/
 		// 行业分类默认值
-		if topscopeclass, ok := tmp["topscopeclass"]; !ok && topscopeclass == nil {
-			update["topscopeclass"] = []string{"其它"}
-			update["s_topscopeclass"] = "其它"
-
-			esUpdate["topscopeclass"] = []string{"其它"}
-			esUpdate["s_topscopeclass"] = "其它"
-		}
+		resultSubs := make([]string, 0)
+		resultTobs := make([]string, 0)
+		if topscopeclass, ok := tmp["topscopeclass"]; ok && topscopeclass != nil {
+			if topps, ok2 := topscopeclass.([]interface{}); ok2 {
+				for _, v := range topps {
+					top := util.ObjToString(v)
+					if top != "" {
+						resultTobs = append(resultTobs, top)
+					}
+				}
+			}
+			//1.一级分类是空数组或者 是 其它
+			if len(resultTobs) == 0 || resultTobs[0] == "其它" {
+				update["topscopeclass"] = []string{"其它"}
+				update["subscopeclass"] = []string{"其它"}
+				update["s_topscopeclass"] = "其它"
+				update["s_subscopeclass"] = "其它"
+				esUpdate["s_topscopeclass"] = "其它"
+				esUpdate["s_subscopeclass"] = "其它"
+				esUpdate["topscopeclass"] = []string{"其它"}
+			} else {
+				if subs, ok3 := tmp["subscopeclass"]; ok3 {
+					if subbs, ok4 := subs.([]interface{}); ok4 {
+						for _, v := range subbs {
+							sub := util.ObjToString(v)
+							if sub != "" && sub != "其它" {
+								resultSubs = append(resultSubs, sub)
+							}
+						}
+					}
+				}
+				newTops, newSubs, cleanedTops := ProcessTopscopeclass(resultTobs, resultSubs)
+				update["topscopeclass"] = newTops
+				update["subscopeclass"] = newSubs
+				update["s_topscopeclass"] = strings.Join(cleanedTops, ",")
+				update["s_subscopeclass"] = strings.Join(newSubs, ",")
+				esUpdate["s_topscopeclass"] = strings.Join(cleanedTops, ",")
+				esUpdate["s_subscopeclass"] = strings.Join(newSubs, ",")
+				esUpdate["topscopeclass"] = newTops
+			}
 
-		if subscopeclass, ok := tmp["subscopeclass"]; !ok && subscopeclass == nil {
+		} else {
+			update["topscopeclass"] = []string{"其它"}
 			update["subscopeclass"] = []string{"其它"}
+			update["s_topscopeclass"] = "其它"
 			update["s_subscopeclass"] = "其它"
-
-			esUpdate["subscopeclass"] = []string{"其它"}
+			esUpdate["s_topscopeclass"] = "其它"
 			esUpdate["s_subscopeclass"] = "其它"
+			esUpdate["topscopeclass"] = []string{"其它"}
 		}
 
 		//procurementlist 处理预计采购时间
-		if procurementlist, ok := tmp["procurementlist"]; ok && procurementlist != nil {
-			field := "procurementlist"
-			if tmp[field] != nil {
-				if field == "procurementlist" {
-					if tmp["procurementlist"] != nil {
-						var arr []interface{}
-						plist := tmp["procurementlist"].([]interface{})
-						for _, p := range plist {
-							p1 := p.(map[string]interface{})
-							p2 := make(map[string]interface{})
-							for k, v := range BiddingLevelField[field] {
-								if k == "projectname" && util.ObjToString(p1[k]) == "" {
-									p2[k] = util.ObjToString(tmp["projectname"])
-								} else if k == "buyer" && util.ObjToString(p1[k]) == "" && util.ObjToString(tmp["buyer"]) != "" {
-									p2[k] = util.ObjToString(tmp["buyer"])
-								} else if k == "expurasingtime" && util.ObjToString(p1[k]) != "" {
-									res := getMethod(util.ObjToString(p1[k]))
-									if res != 0 {
-										p2[k] = res
-									}
-								} else if p1[k] != nil && reflect.TypeOf(p1[k]).String() == v {
-									p2[k] = p1[k]
-								}
-
-							}
-							arr = append(arr, p2)
-						}
-						if len(arr) > 0 {
-							esUpdate[field] = arr
-						}
-					}
-				}
-			}
-		}
+		//if procurementlist, ok := tmp["procurementlist"]; ok && procurementlist != nil {
+		//	field := "procurementlist"
+		//	if tmp[field] != nil {
+		//		if field == "procurementlist" {
+		//			if tmp["procurementlist"] != nil {
+		//				var arr []interface{}
+		//				plist := tmp["procurementlist"].([]interface{})
+		//				for _, p := range plist {
+		//					p1 := p.(map[string]interface{})
+		//					p2 := make(map[string]interface{})
+		//					for k, v := range BiddingLevelField[field] {
+		//						if k == "projectname" && util.ObjToString(p1[k]) == "" {
+		//							p2[k] = util.ObjToString(tmp["projectname"])
+		//						} else if k == "buyer" && util.ObjToString(p1[k]) == "" && util.ObjToString(tmp["buyer"]) != "" {
+		//							p2[k] = util.ObjToString(tmp["buyer"])
+		//						} else if k == "expurasingtime" && util.ObjToString(p1[k]) != "" {
+		//							res := getMethod(util.ObjToString(p1[k]))
+		//							if res != 0 {
+		//								p2[k] = res
+		//							}
+		//						} else if p1[k] != nil && reflect.TypeOf(p1[k]).String() == v {
+		//							p2[k] = p1[k]
+		//						}
+		//
+		//					}
+		//					arr = append(arr, p2)
+		//				}
+		//				if len(arr) > 0 {
+		//					esUpdate[field] = arr
+		//				}
+		//			}
+		//		}
+		//	}
+		//}
 
 		if len(update) > 0 {
 			//fmt.Println("aaaaa", biddingID)
@@ -577,6 +628,97 @@ func dealBidding() {
 	log.Info("Run Over...Count:", log.Int("count", count))
 }
 
+func dealBiddingByEs() {
+	//url := "http://172.17.4.184:19908"
+	url := "http://127.0.0.1:19908"
+	username := "jybid"
+	password := "Top2023_JEB01i@31"
+	index := "bidding" //索引名称
+	//index := "projectset" //索引名称
+	// 创建 Elasticsearch 客户端
+	client, err := es7.NewClient(
+		es7.SetURL(url),
+		es7.SetBasicAuth(username, password),
+		es7.SetSniff(false),
+	)
+	if err != nil {
+		log.Fatalf("创建 Elasticsearch 客户端失败:%s", err)
+	}
+
+	query := es7.NewBoolQuery()
+	query.Must(es7.NewRangeQuery("comeintime").Gt(1718812800))
+	query.MustNot(es7.NewExistsQuery("s_topscopeclass"))
+
+	ctx := context.Background()
+	//开始滚动搜索
+	scrollID := ""
+	scroll := "10m"
+	searchSource := es7.NewSearchSource().
+		Query(query).
+		Size(10000).
+		Sort("_doc", true) //升序排序
+	//Sort("_doc", false) //降序排序
+
+	searchService := client.Scroll(index).
+		Size(10000).
+		Scroll(scroll).
+		SearchSource(searchSource)
+
+	res, err := searchService.Do(ctx)
+
+	if err != nil {
+		if err == io.EOF {
+			fmt.Println("没有数据")
+		} else {
+			panic(err)
+		}
+
+	}
+	//defer client.ClearScroll().ScrollId(scrollID).Do(ctx) // 在退出时清理资源
+	fmt.Println("总数是:", res.TotalHits())
+
+	total := 0
+	for len(res.Hits.Hits) > 0 {
+		for _, hit := range res.Hits.Hits {
+			var doc map[string]interface{}
+			err := json.Unmarshal(hit.Source, &doc)
+			if err != nil {
+				fmt.Printf("解析文档失败:%s", err)
+				continue
+			}
+			//delete(doc, "filetext")
+			//delete(doc, "detail")
+			//
+			////存入新表
+			//err = MgoB.InsertOrUpdate("qfw", "wcc_subtype_err_0429", doc)
+			//if err != nil {
+			//	fmt.Println("error", doc["id"])
+			//}
+
+		}
+
+		total = total + len(res.Hits.Hits)
+		scrollID = res.ScrollId
+		res, err = client.Scroll().ScrollId(scrollID).Scroll(scroll).Do(ctx)
+		fmt.Println("current count:", total)
+		if err != nil {
+			if err == io.EOF {
+				// 滚动到最后一批数据,退出循环
+				break
+			}
+			fmt.Println("滚动搜索失败:", err, res)
+			break // 处理错误时退出循环
+		}
+	}
+	// 在循环外调用 ClearScroll
+	_, err = client.ClearScroll().ScrollId(scrollID).Do(ctx)
+	if err != nil {
+		fmt.Printf("清理滚动搜索失败:%s", err)
+	}
+
+	fmt.Println("结束~~~~~~~~~~~~~~~")
+}
+
 // dealBiddingTest 处理测试环境数据
 func dealBiddingTest() {
 	defer util.Catch()

+ 46 - 0
updateBidding/util.go

@@ -4,7 +4,9 @@ import (
 	"fmt"
 	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"regexp"
+	"sort"
 	"strconv"
+	"strings"
 	"unicode"
 	"unicode/utf8"
 )
@@ -189,3 +191,47 @@ func getMethod(str string) int64 {
 
 	return 0
 }
+
+// ProcessTopscopeclass 处理行业分类
+func ProcessTopscopeclass(tops, subs []string) ([]string, []string, []string) {
+	// 去除 tops 中每个元素末尾的不固定字符
+	cleanedTops := make([]string, 0)
+	for _, top := range tops {
+		parts := strings.Split(top, "")
+		cleanedTop := strings.Join(parts[:len(parts)-1], "")
+		if !IsInStringArray(cleanedTop, cleanedTops) {
+			cleanedTops = append(cleanedTops, cleanedTop)
+		}
+	}
+
+	// 用于标记 cleanedTops 中已存在于 subs 的元素
+	presentMap := make(map[string]bool)
+
+	// 遍历 subs 数组,标记已存在的 cleanedTops 元素
+	for _, sub := range subs {
+		for _, top := range cleanedTops {
+			if strings.Contains(sub, top) {
+				presentMap[top] = true
+			}
+		}
+	}
+
+	// 补充缺失的 cleanedTops 元素到 subs 中
+	for _, top := range cleanedTops {
+		if !presentMap[top] {
+			subs = append(subs, top+"_其它")
+		}
+	}
+
+	return tops, subs, cleanedTops
+}
+
+// IsInStringArray 判断数组中是否存在字符串
+func IsInStringArray(str string, arr []string) bool {
+	// 先对字符串数组进行排序
+	sort.Strings(arr)
+	// 使用二分查找算法查找字符串
+	pos := sort.SearchStrings(arr, str)
+	// 如果找到了则返回 true,否则返回 false
+	return pos < len(arr) && arr[pos] == str
+}

BIN
xlsx/20240725导出数据.xlsx


+ 1 - 0
xlsx/highBidding.go

@@ -8,6 +8,7 @@ import (
 	"log"
 )
 
+// HighBidding 匹配高质量库数据
 func HighBidding() {
 	f, err := excelize.OpenFile("./售后组测试企业_20240201_1683.xlsx")
 	if err != nil {

+ 117 - 20
xlsx/main.go

@@ -15,6 +15,7 @@ import (
 	"log"
 	"net/url"
 	"os"
+	"regexp"
 	"sort"
 	"strings"
 )
@@ -32,7 +33,7 @@ func main() {
 	//fmt.Println(11111)
 	//HighBidding()
 	//exportQy()
-	exportPhone2() // 导出联系电话
+	//exportPhone3() // 导出联系电话
 	//clickhouseData()
 	//getName()
 	log.Println("over")
@@ -142,7 +143,7 @@ func exportWinner() {
 	fmt.Println("数据已成功导出到 exported_data.xlsx")
 }
 
-// exportPhone 根据企业名单,导出联系人电话
+// exportPhone 根据企业名单,导出联系人电话;凭安和标讯抽取到的
 func exportPhone() {
 	Mgo := &mongodb.MongodbSim{
 		//MongodbAddr: "172.17.189.140:27080",
@@ -160,7 +161,7 @@ func exportPhone() {
 	username := "datascbi"
 	password := "Da#Bi20221111SC"
 	host := "127.0.0.1:4001"
-	//host := "172.17.4.242:4000"
+	//host := "172.17.162.25:4000"
 	database := "global_common_data"
 	dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
 	// 连接到数据库
@@ -261,23 +262,10 @@ func exportPhone() {
 
 // exportPhone 根据企业名单,导出联系人电话,只要抽取到的数据,联系人,联系电话 只要一个
 func exportPhone2() {
-	Mgo := &mongodb.MongodbSim{
-		//MongodbAddr: "172.17.189.140:27080",
-		MongodbAddr: "127.0.0.1:27083",
-		Size:        10,
-		DbName:      "mixdata",
-		UserName:    "SJZY_RWbid_ES",
-		Password:    "SJZY@B4i4D5e6S",
-		Direct:      true,
-	}
-	Mgo.InitPool()
-
-	//
-
 	username := "datascbi"
 	password := "Da#Bi20221111SC"
 	host := "127.0.0.1:4001"
-	//host := "172.17.4.242:4000"
+	//host := "172.17.162.25:4000"
 	database := "global_common_data"
 	dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
 	// 连接到数据库
@@ -287,7 +275,7 @@ func exportPhone2() {
 		return
 	}
 
-	f, err := excelize.OpenFile("./河南省物业企业名单.xlsx")
+	f, err := excelize.OpenFile("./北京代理机构.xlsx")
 	if err != nil {
 		fmt.Println(err)
 		return
@@ -312,7 +300,7 @@ func exportPhone2() {
 		ContactTel  string `json:"contact_tel"`
 	}
 	for i := 1; i < len(rows); i++ {
-		name := rows[i][1]
+		name := rows[i][0]
 		if !strings.Contains(name, "公司") {
 			continue
 		}
@@ -349,12 +337,15 @@ func exportPhone2() {
 		db.Table("dws_f_ent_baseinfo").Select("name", "name_id").Where("name = ? ", name).Scan(&baseinfo)
 		if baseinfo.NameId != "" {
 			contacts := []Contact{}
-			db.Table("dws_f_ent_contact").Select("contact_name", "contact_tel").Where("name_id = ? ", baseinfo.NameId).Scan(&contacts)
+			db.Table("dws_f_ent_contact").Select("contact_name", "contact_tel").Where("name_id = ? ", baseinfo.NameId).Order("publishtime desc").Scan(&contacts)
 			if len(contacts) > 0 {
 				for _, v := range contacts {
 					if strings.Contains(v.ContactTel, ">") {
 						continue
 					}
+					if !validateMobileNumber(v.ContactTel) {
+						continue
+					}
 					if v.ContactName != "" && v.ContactTel != "" {
 						f.SetCellValue("Sheet1", fmt.Sprintf("D%v", i+1), v.ContactName)
 						f.SetCellValue("Sheet1", fmt.Sprintf("E%v", i+1), v.ContactTel)
@@ -363,7 +354,107 @@ func exportPhone2() {
 				}
 			}
 		}
+	}
+
+	f.Save()
+}
+
+// exportPhone3 根据企业名单导出联系人,联系电话,只要抽取到的
+func exportPhone3() {
+	username := "datascbi"
+	password := "Da#Bi20221111SC"
+	host := "127.0.0.1:4001"
+	//host := "172.17.162.25:4000"
+	database := "global_common_data"
+	dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
+	// 连接到数据库
+	db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{})
+	if err != nil {
+		log.Println("Failed to connect to database:", err)
+		return
+	}
+
+	f, err := excelize.OpenFile("./北京代理机构.xlsx")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	defer func() {
+		if err := f.Close(); err != nil {
+			fmt.Println(err)
+		}
+	}()
+
+	rows, err := f.GetRows("Sheet1")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	type BaseInfo struct {
+		Name   string `json:"name"`
+		NameId string `json:"name_id"`
+	}
+	type Contact struct {
+		ContactName string `json:"contact_name"`
+		ContactTel  string `json:"contact_tel"`
+	}
+	line := 1
+	for i := 1; i < len(rows); i++ {
+		name := rows[i][0]
+		if !strings.Contains(name, "公司") {
+			continue
+		}
+		log.Println(i, "----", name)
+		//contactsMap := make([]string, 0)
+
+		//stds, _ := Mgo.FindOne("qyxy_std", map[string]interface{}{"company_name": name})
+		//var reportsMap = make([]string, 0)
+		//if reports, ok := (*stds)["annual_reports"]; ok {
+		//	if rs, ok := reports.([]interface{}); ok {
+		//		for _, v := range rs {
+		//			if da, ok := v.(map[string]interface{}); ok {
+		//				if util.ObjToString(da["operator_name"]) != "" && util.ObjToString(da["company_phone"]) != "" {
+		//					tm := util.ObjToString(da["operator_name"]) + "_" + util.ObjToString(da["company_phone"])
+		//					if !IsInStringArray(tm, reportsMap) {
+		//						reportsMap = append(reportsMap, tm)
+		//					}
+		//				} else if util.ObjToString(da["company_phone"]) != "" {
+		//					if !IsInStringArray(util.ObjToString(da["company_phone"]), reportsMap) {
+		//						reportsMap = append(reportsMap, util.ObjToString(da["company_phone"]))
+		//					}
+		//				}
+		//			}
+		//
+		//		}
+		//	}
+		//}
+		//if len(reportsMap) > 0 {
+		//	res := strings.Join(reportsMap, ",")
+		//	f.SetCellValue("Sheet1", fmt.Sprintf("E%v", i+1), res)
+		//}
 
+		baseinfo := BaseInfo{}
+		db.Table("dws_f_ent_baseinfo").Select("name", "name_id").Where("name = ? ", name).Scan(&baseinfo)
+		if baseinfo.NameId != "" {
+			contacts := []Contact{}
+			db.Table("dws_f_ent_contact").Select("contact_name", "contact_tel").Where("name_id = ? ", baseinfo.NameId).Order("publishtime desc").Scan(&contacts)
+			if len(contacts) > 0 {
+				for _, v := range contacts {
+					if strings.Contains(v.ContactTel, ">") {
+						continue
+					}
+					if !validateMobileNumber(v.ContactTel) {
+						continue
+					}
+					if v.ContactName != "" && v.ContactTel != "" {
+						f.SetCellValue("Sheet1", fmt.Sprintf("C%v", line+1), name)
+						f.SetCellValue("Sheet1", fmt.Sprintf("D%v", line+1), v.ContactName)
+						f.SetCellValue("Sheet1", fmt.Sprintf("E%v", line+1), v.ContactTel)
+						line++
+					}
+				}
+			}
+		}
 	}
 
 	f.Save()
@@ -434,3 +525,9 @@ func IsInStringArray(str string, arr []string) bool {
 	// 如果找到了则返回 true,否则返回 false
 	return pos < len(arr) && arr[pos] == str
 }
+
+func validateMobileNumber(mobileNumber string) bool {
+	// 手机号码正则表达式,这里只是一个简单的示例,可能需要根据您的具体需求进行调整
+	re := regexp.MustCompile(`^1[3-9]\d{9}$`)
+	return re.MatchString(mobileNumber)
+}

+ 2 - 2
xlsx/qyxy.go

@@ -30,7 +30,7 @@ func exportQy() {
 	username := "datascbi"
 	password := "Da#Bi20221111SC"
 	host := "127.0.0.1:4001"
-	//host := "172.17.4.242:4000"
+	//host := "172.17.162.25:4000"
 	database := "global_common_data"
 	dsn := fmt.Sprintf("%s:%s@tcp(%s)/%s?charset=utf8mb4&parseTime=True&loc=Local", username, password, host, database)
 	// 连接到数据库
@@ -88,7 +88,7 @@ func exportQy() {
 		contactsMap := make([]string, 0) //联系人信息
 		baseinfo := BaseInfo{}
 		log.Println(name)
-		db.Table("dws_f_ent_baseinfo").Select("name", "name_id").Where("name = ? ", name).Order("createtime desc").Limit(10).Scan(&baseinfo)
+		db.Table("dws_f_ent_baseinfo").Select("name", "name_id").Where("name = ? ", name).Order("publishtime desc").Limit(10).Scan(&baseinfo)
 		if baseinfo.NameId != "" {
 			contacts := []Contact{}
 			db.Table("dws_f_ent_contact").Select("contact_name", "contact_tel").Where("name_id = ? ", baseinfo.NameId).Scan(&contacts)

+ 111 - 0
xlsx/xlsx_test.go

@@ -3,10 +3,14 @@ package main
 import (
 	"fmt"
 	"github.com/xuri/excelize/v2"
+	util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
 	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
 	"log"
+	"os"
+	"strconv"
 	"strings"
 	"testing"
+	"time"
 )
 
 func TestA(T *testing.T) {
@@ -497,3 +501,110 @@ func TestHN(t *testing.T) {
 	f.Save()
 	log.Println("结束")
 }
+
+// TestExportBidding 导出bidding数据
+func TestExportBidding(t *testing.T) {
+	Mgo := &mongodb.MongodbSim{
+		//MongodbAddr: "172.17.189.140:27080",
+		MongodbAddr: "127.0.0.1:27083",
+		Size:        10,
+		DbName:      "qfw",
+		UserName:    "SJZY_RWbid_ES",
+		Password:    "SJZY@B4i4D5e6S",
+		Direct:      true,
+	}
+	Mgo.InitPool()
+
+	sess := Mgo.GetMgoConn()
+	defer Mgo.DestoryMongoConn(sess)
+
+	//start, _ := time.Parse("2006-01-02 15:04:05", "2023-09-06 21:00:00")
+	//end, _ := time.Parse("2006-01-02 15:04:05", "2023-09-07 09:00:00")
+	//////
+	//q := map[string]interface{}{
+	//	"publish": map[string]interface{}{
+	//		"$gte": 1701360000,
+	//		"$lte": 1706716800,
+	//	},
+	//	//"modifyinfo.toptype": map[string]interface{}{
+	//	//	"$exists": 1,
+	//	//},
+	//	"toptype": "拟建",
+	//	//"title": map[string]interface{}{
+	//	//	"$regex": "充电",
+	//	//},
+	//}
+
+	q := map[string]interface{}{
+		"s_classid": "57a189cbd368081d70e185cf",
+		"s_name": map[string]interface{}{
+			"$in": []string{"废标", "流标", "结果变更", "中标", "成交", "中选", "其它"},
+		},
+	}
+
+	query := sess.DB("qfw").C("rc_rule").Find(q).Select(nil).Iter()
+
+	count := 0
+	file := time.Now().Format("20060102") + "导出数据.xlsx"
+	currentPwd, _ := os.Getwd()
+	exportFile := fmt.Sprintf("%s/%s", currentPwd, file)
+	xlsx := excelize.NewFile(excelize.Options{ShortDatePattern: "yyyy/m/dd"})
+	styleOne, _ := xlsx.NewStyle(
+		&excelize.Style{
+			Alignment: &excelize.Alignment{
+				Horizontal: "left",
+				Vertical:   "left",
+			},
+		},
+	)
+	line := 0
+	sheet := "Sheet1"
+
+	subtitles := []string{"s_name", "s_rule"}
+	//subtitles := []string{"bidding_id", "title", "detail", "href", "jyhref", "toptype", "new_toptype", "new_subtype"}
+	line++
+	//设置第一行title
+	_ = xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &subtitles)
+	for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
+		if count%1000 == 0 {
+			log.Println("current --- ", count)
+		}
+
+		//company_phone := util.ObjToString(tmp["company_phone"])
+		//if len(company_phone) != 11 {
+		//	continue
+		//}
+		//if !validateMobileNumber(company_phone) {
+		//	continue
+		//}
+		//if util.ObjToString(tmp["employee_name"]) == "" {
+		//	continue
+		//}
+
+		//id := mongodb.BsonIdToSId(tmp["_id"])
+		line++
+		val := []interface{}{}
+		for _, v := range subtitles {
+			if v == "employee_name" {
+				val = append(val, strings.Split(util.ObjToString(tmp["employee_name"]), ",")[0])
+			} else {
+				val = append(val, tmp[v])
+			}
+
+		}
+		err := xlsx.SetSheetRow(sheet, fmt.Sprintf("%s%d", "A", line), &val)
+		if err != nil {
+			log.Println(err)
+			return
+		}
+		_ = xlsx.SetCellStyle(sheet, fmt.Sprintf("%s%d", "A", line), "BA"+strconv.Itoa(line), styleOne)
+
+		tmp = make(map[string]interface{})
+
+	}
+	xlsx.Path = exportFile
+	xlsx.Save()
+
+	log.Println("dealTmp over ", count)
+
+}

BIN
xlsx/北京代理机构.xlsx


+ 3 - 3
yuqing/config.toml

@@ -7,7 +7,7 @@
 #    size = 15
 
 #    host = "baibai.ink:28088"
-    host = "127.0.0.1:27081"
+    host = "127.0.0.1:27081" ## 87
     db = "hp_news"
     username = ""
     password = ""
@@ -45,9 +45,9 @@
     end = 0
     shour = 12   ## 开始时间小时
     ehour = 12  ## 结束时间小时
-    coll1 = "CorporatePublicOpinion" ##
+    coll1 = "CorporatePublicOpinion" ## 官网数据
     coll2 = "news_detail" ## 百度的数据
-    collb = "bidding_yq"
+    collb = "bidding_yq" ## lua 数据
     keyword = "news_Keywords"
     specials = ["μ", "#NAME?", "¥", "Δ", "æ", "¡", "ζ", "Ψ", "Φ", "ώ", "≈","ж","Զ","ń","α","޹","й","ë","л",
         "Λ", "Θ", "ß", "±", "Ύ", "φ", "¬", "Й", "Щ"]

+ 1 - 1
yuqing/main.go

@@ -197,7 +197,7 @@ func exportFiles() {
 		"contenthtml": 0}).Iter()
 	count := 0
 
-	//1.读取所有infourl
+	//1.读取所有 已经保存交付的infourl
 	for tmp := make(map[string]interface{}); query.Next(tmp); count++ {
 		infourl := util.ObjToString(tmp["infourl"])
 		if infourl != "" {

+ 7 - 1
yuqing/readme.txt

@@ -1,4 +1,10 @@
 
     本程序 主要是为了处理 惠普舆情数据,主要功能如下:
         1.导出每天采集到的数据
-        2.导入每天交付的数据到 hp_news 数据表
+        2.导入每天交付的数据到 hp_news 数据表
+
+
+ 1、先从 166 jichunling.hp_news 已交付数据,从87 CorporatePublicOpinion 官方数据,news_detail百度数据 infourl
+ bidding_yq 官网-lua 数据 href ,存放数据进行过滤,导出数据对比去重。
+ 2、
+