浏览代码

更新惠普 标签数据规则

wcc 1 年之前
父节点
当前提交
132fc7c727
共有 6 个文件被更改,包括 122 次插入11 次删除
  1. 二进制
      export_employee/L2.xlsx
  2. 104 0
      export_employee/l2.go
  3. 10 8
      hp_news_tag/main.go
  4. 5 0
      hp_news_tag/readme.txt
  5. 1 1
      yuqing/config.toml
  6. 2 2
      yuqing/main.go

二进制
export_employee/L2.xlsx


+ 104 - 0
export_employee/l2.go

@@ -0,0 +1,104 @@
+package main
+
+import (
+	"fmt"
+	"github.com/xuri/excelize/v2"
+	"jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
+	"log"
+)
+
+func dealL2() {
+	f, err := excelize.OpenFile("L2.xlsx")
+	if err != nil {
+		fmt.Println(err)
+		return
+	}
+	defer func() {
+		f.Save()
+		if err := f.Close(); err != nil {
+			fmt.Println(err)
+		}
+	}()
+	rows, err := f.Rows("Sheet1")
+
+	mgo := &mongodb.MongodbSim{
+		MongodbAddr: "192.168.3.166:27082",
+		DbName:      "wcc",
+		Size:        10,
+		UserName:    "",
+		Password:    "",
+	}
+
+	mgo.InitPool()
+
+	//存储 l0 对应地下 的l2 数量总和
+	//resa := make(map[string]map[string]interface{}, 0)
+
+	i := 0
+	for rows.Next() {
+		i++
+		if i == 1 {
+			continue
+		}
+		if i%1000 == 0 {
+			log.Println(i)
+		}
+
+		row, err := rows.Columns()
+		if err != nil {
+			fmt.Println(err)
+		}
+		//1.没有参保人数、企业人数
+		where := map[string]interface{}{
+			"company_name": row[1],
+		}
+		var insert = make(map[string]interface{}, 0)
+		insert["l0"] = row[0]
+		insert["l2"] = row[1]
+		if len(row) == 2 {
+			data, _ := mgo.FindOne("L2data", where)
+			da := *data
+			if len(da) > 0 {
+				insert["insurance_amount"] = da["insurance_amount"]
+				insert["employee"] = da["employee"]
+			} else {
+				insert["insurance_amount"] = ""
+				insert["employee"] = ""
+			}
+		} else if len(row) == 3 {
+			data, _ := mgo.FindOne("L2data", where)
+			da := *data
+			if len(da) > 0 {
+				insert["insurance_amount"] = da["insurance_amount"]
+				insert["employee"] = da["employee"]
+			} else {
+				insert["insurance_amount"] = row[2]
+				insert["employee"] = ""
+			}
+
+		} else if len(row) == 4 {
+			if (row[2] != "" && row[2] != "0") && (row[3] != "" && row[3] != "0") {
+				insert["insurance_amount"] = row[2]
+				insert["employee"] = row[3]
+			} else {
+				data, _ := mgo.FindOne("L2data", where)
+				da := *data
+				if len(da) > 0 {
+					insert["insurance_amount"] = da["insurance_amount"]
+					insert["employee"] = da["employee"]
+				} else {
+					insert["insurance_amount"] = row[2]
+					insert["employee"] = row[3]
+				}
+			}
+		}
+
+		//
+		mgo.Save("wcc_l2", insert)
+	}
+	if err = rows.Close(); err != nil {
+		fmt.Println(err)
+	}
+
+	log.Println("over")
+}

文件差异内容过多而无法显示
+ 10 - 8
hp_news_tag/main.go


+ 5 - 0
hp_news_tag/readme.txt

@@ -2,3 +2,8 @@
 >|  惠普数据打标签
 
     主要是给 hp_news 数据表的数据打标签,label = 1|2|3
+
+
+
+1.标题排除词 - 2
+2.内容排除词 - 2

+ 1 - 1
yuqing/config.toml

@@ -34,7 +34,7 @@
 #    files = ["20230915惠普_舆情.xlsx"] ## 导入的文件名
     files = ["20230914惠普_舆情数据.xlsx","20230918惠普_舆情数据.xlsx","20230919惠普_舆情数据.xlsx","20230920惠普_舆情数据.xlsx","20230921惠普_舆情数据.xlsx","20230922惠普_舆情数据.xlsx","20230925惠普_舆情数据.xlsx","20230926惠普_舆情数据.xlsx","20230927惠普_舆情数据.xlsx","20230928惠普_舆情数据.xlsx","20231007惠普_舆情数据.xlsx","20231008惠普_舆情数据.xlsx","20231009惠普_舆情数据.xlsx","20231010惠普_舆情数据.xlsx","20231011惠普_舆情数据.xlsx","20231012惠普_舆情数据.xlsx","20231013惠普_舆情数据.xlsx","20231016惠普_舆情数据.xlsx","20231017惠普_舆情数据.xlsx","20231018惠普_舆情数据.xlsx","20231019惠普_舆情数据.xlsx","20231020惠普_舆情数据.xlsx","20231023惠普_舆情数据.xlsx","20231024惠普_舆情数据.xlsx","20231025惠普_舆情数据.xlsx","20231026惠普_舆情数据.xlsx","20231027惠普_舆情数据.xlsx","20231030惠普_舆情数据.xlsx","20231031惠普_舆情数据.xlsx","20231101惠普_舆情数据.xlsx","20231102惠普_舆情数据.xlsx","20231103惠普_舆情数据.xlsx","20231106惠普_舆情数据.xlsx","20231107惠普_舆情数据.xlsx"] ## 导入的文件名
 
-    start = -4   ## -1表示导出昨天数据
+    start = -3   ## -1表示导出昨天数据
     end = 0
     coll1 = "CorporatePublicOpinion"
     coll2 = "news_detail"

+ 2 - 2
yuqing/main.go

@@ -98,9 +98,9 @@ func main() {
 	InitConfig()
 	InitLog()
 	InitMgo()
-	readFile()
+	//readFile()
 
-	//exportFiles()
+	exportFiles()
 
 	select {}
 }

部分文件因为文件数量过多而无法显示