Explorar el Código

数据备份~领域相关~所有

zhengkun hace 2 años
padre
commit
c95b5d6000

+ 7 - 1
README.md

@@ -11,5 +11,11 @@
 
 ### fieldproject_medical(医疗数据处理模块) 
 + data_preparation (基础信息~数据准备~)
-  + 1、企业产品  2、医院整合  3、产品整合  4、修复代码表等
+  + 1、企业产品信息  
+  + 2、医院相关信息  
+  + 3、产品相关整合  
+  + 4、代码表
 
++ data_service (服务)
+  + 1、标的物相关信息  
+  + 2、相关服务

+ 2 - 2
fieldproject_medical/data_preparation/src/hospital/hospital.go

@@ -21,7 +21,7 @@ func RunHospital() {
 	RunCleanHospitalInfo()
 	RunResetHospitalInfo(save_hospital_coll, "name")
 	//增加索引
-	createMgoIndex(save_hospital_coll, []string{"repeat_id"})
+	createMgoIndex(save_hospital_coll, []string{"repeat_id", "name"})
 	RunMergeHospitalInfo(save_hospital_coll, merge_hospital_coll_1)
 	//增加索引
 	createMgoIndex(merge_hospital_coll_1, []string{"repeat_id", "name"})
@@ -31,7 +31,7 @@ func RunHospital() {
 	//根据最终的company_id 重置判重标记
 	RunResetHospitalInfo(merge_hospital_coll_2, "company_id")
 	//增加索引
-	createMgoIndex(merge_hospital_coll_2, []string{"company_id"})
+	createMgoIndex(merge_hospital_coll_2, []string{"repeat_id", "name"})
 	RunMergeHospitalInfo(merge_hospital_coll_2, merge_hospital_coll_3)
 
 }

+ 1 - 1
fieldproject_medical/data_preparation/src/hospital/hospital_sql.go

@@ -18,7 +18,7 @@ func ExportHospitalInfoToMysql() {
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}
-	it := sess.DB(class.Save_Mgo.DbName).C(merge_hospital_coll_2).Find(&q).Sort("_id").Iter()
+	it := sess.DB(class.Save_Mgo.DbName).C(merge_hospital_coll_3).Find(&q).Sort("_id").Iter()
 	pool := make(chan bool, 3)
 	wg := &sync.WaitGroup{}
 	total := 0

+ 0 - 3
fieldproject_medical/data_preparation/src/main.go

@@ -28,9 +28,6 @@ func main() {
 	//企业生产经营产品~信息
 	//company.RunCompanyProductInfo()
 
-	//修复数据
-	//repair.RepairUnClassProduct()
-
 	time.Sleep(999 * time.Hour)
 }
 

+ 49 - 27
fieldproject_medical/data_service/src/bidding/bidding.go

@@ -29,11 +29,11 @@ func RunPurchasingInfo(gtid string, lteid string) {
 		},
 	}
 	it := sess.DB(ul.Mgo.DbName).C(ul.S_Bidding_Coll).Find(&q).Sort("_id").Select(fields).Iter()
-	pool := make(chan bool, 6)
+	pool := make(chan bool, 8)
 	wg := &sync.WaitGroup{}
 	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%10000 == 0 {
+		if total%1000 == 0 {
 			log.Debug("cur index ", total, "~", isok)
 		}
 		if qu.IntAll(tmp["extracttype"]) != 1 {
@@ -47,16 +47,15 @@ func RunPurchasingInfo(gtid string, lteid string) {
 				<-pool
 				wg.Done()
 			}()
-			//构建标的物基本信息
-			b := createBaseInfo(tmp)
+			b := createBaseInfo(tmp) //构建信息
 			if b {
 				numlock.Lock()
 				isok++
-				//ul.Mgo.UpdateById(ul.S_Bidding_Coll, class.BsonTOStringId(tmp["_id"]), map[string]interface{}{
-				//	"$set": map[string]interface{}{
-				//		"bid_field": "0101",
-				//	},
-				//})
+				ul.Mgo.UpdateById(ul.S_Bidding_Coll, class.BsonTOStringId(tmp["_id"]), map[string]interface{}{
+					"$set": map[string]interface{}{
+						"bid_field": "0101",
+					},
+				})
 				numlock.Unlock()
 			}
 		}(tmp)
@@ -67,19 +66,20 @@ func RunPurchasingInfo(gtid string, lteid string) {
 	log.Debug("is over ", total, "~", isok)
 }
 
-//构建标的物基本信息
+//构建标的物基本信息~模式一~规则型
 func createBaseInfo(tmp map[string]interface{}) bool {
 	p_list := IsMarkInterfaceMap(tmp["purchasinglist"])
+	new_plist := []map[string]interface{}{}
 	infoid := class.BsonTOStringId(tmp["_id"])
-	s_topscopeclass := qu.ObjToString(tmp["s_topscopeclass"])
-	s_subscopeclass := qu.ObjToString(tmp["s_subscopeclass"])
-	buyerclass := qu.ObjToString(tmp["buyerclass"])
-	buyer := qu.ObjToString(tmp["buyer"])
 	bid_topsubtype_code := confrimTopSubCode(qu.ObjToString(tmp["toptype"]), qu.ObjToString(tmp["subtype"]))
-	//是否符合领域范围
-	industry_code, isField := IsIndustryInfo(s_topscopeclass, s_subscopeclass, buyerclass, buyer)
+	industry_code := confrimIndustryCode(qu.ObjToString(tmp["s_topscopeclass"]), qu.ObjToString(tmp["s_subscopeclass"]))
+	isField := IsMedicalIndustryFieldInfo(tmp)
 	if isField {
-		new_plist := createNewPurchasingInfo(p_list)
+		_, new_plist = createNewPurchasingInfo(p_list)
+	} else { //其他规则~验证是否能~通过验证
+		isField, new_plist = createNewPurchasingInfo(p_list)
+	}
+	if isField { //最终根据是否为领域数据灌入数据
 		//标的物基本信息~记录标签
 		insertBaseInfo(new_plist, infoid, bid_topsubtype_code)
 		//招标信息领域标签
@@ -87,12 +87,16 @@ func createBaseInfo(tmp map[string]interface{}) bool {
 		//招标信息行业标签
 		insertIndustryTag(industry_code, infoid)
 	}
+
+	//阻止注释~报错
+	//if infoid == "" || bid_topsubtype_code == "" || industry_code == "" || len(new_plist) == 0 {}
 	return isField
 }
 
 //返回新的~标的物信息(分类+整合)
-func createNewPurchasingInfo(p_list []map[string]interface{}) []map[string]interface{} {
+func createNewPurchasingInfo(p_list []map[string]interface{}) (bool, []map[string]interface{}) {
 	new_plist := []map[string]interface{}{}
+	isExists := false
 	for _, v := range p_list {
 		//去重处理~名称~品牌~型号~暂无重复
 		data := map[string]interface{}{}
@@ -103,7 +107,9 @@ func createNewPurchasingInfo(p_list []map[string]interface{}) []map[string]inter
 		//根据标的物名字~打上具体的分类数据~
 		//重要重要重要
 		yl_code := confrimTargetMedicalClass(itemname)
-
+		if yl_code != "" {
+			isExists = true
+		}
 		data["medical_equipment_code"] = yl_code
 		data["itemname"] = itemname
 		data["brand"] = qu.ObjToString(v["brandname"])
@@ -134,27 +140,43 @@ func createNewPurchasingInfo(p_list []map[string]interface{}) []map[string]inter
 		}
 		new_plist = append(new_plist, data)
 	}
-	return new_plist
+	return isExists, new_plist
 }
 
 //是否为医疗行业数据
-func IsIndustryInfo(top_class string, sub_class string, buyer_class string, buyer string) (string, bool) {
-	code := ""
-	//两组条件判断是否为医疗领域数据~
+func IsMedicalIndustryFieldInfo(tmp map[string]interface{}) bool {
+	top_class := qu.ObjToString(tmp["s_topscopeclass"])
+	sub_class := qu.ObjToString(tmp["s_subscopeclass"])
+	buyer_class := qu.ObjToString(tmp["buyerclass"])
+	buyer := qu.ObjToString(tmp["buyer"])
+	//第一组规则条件
+	if isFieldInfoMethodFirst(top_class, sub_class, buyer_class, buyer) {
+		return true
+	}
+	return false
+}
+
+//第一组规则条件是否校验通过
+func isFieldInfoMethodFirst(top_class string, sub_class string, buyer_class string, buyer string) bool {
 	is_A, is_B := false, false
 	if A_FieldReg.MatchString(sub_class) || (sub_class == "" && strings.Contains(top_class, "医疗卫生")) {
 		is_A = true
 	}
 	if !is_A {
-		return "", false
+		return false
 	}
 	if B_FieldReg.MatchString(buyer_class) || (buyer_class == "制造业" && strings.Contains(buyer, "医疗器械")) {
 		is_B = true
 	}
 	if !is_B {
-		return "", false
+		return false
 	}
-	//拼接code
+	return is_A && is_B
+}
+
+//确认行业code
+func confrimIndustryCode(top_class string, sub_class string) string {
+	code := ""
 	top_arr := strings.Split(top_class, ",")
 	for _, v := range top_arr {
 		vc := ul.Bid_Industry[v]["code"]
@@ -176,7 +198,7 @@ func IsIndustryInfo(top_class string, sub_class string, buyer_class string, buye
 			code += vc
 		}
 	}
-	return code, true
+	return code
 }
 
 //根据标的物名称~打医疗分类

+ 25 - 0
fieldproject_medical/data_service/src/bidding/purchasing.go

@@ -38,6 +38,10 @@ func confrimTargetMedicalClass(name string) string {
 //计算相似度得分
 func calculateSimilarityScore(indexDocs map[int][]string, itemArr []string) string {
 	scoreDocs := map[int]float64{}
+	//临时记录~
+	//scoreDocs_1 := map[int]float64{}
+	//scoreDocs_2 := map[int]float64{}
+
 	itemName := strings.Join(itemArr, "")
 	for k, v := range indexDocs {
 		v_str := strings.Join(v, "")
@@ -49,6 +53,11 @@ func calculateSimilarityScore(indexDocs map[int][]string, itemArr []string) stri
 		finally_score := (base_score + dice_score) / 2
 		if finally_score >= 0.55 && dice_score > 0.0 {
 			scoreDocs[k] = qu.FloatFormat(finally_score, 2)
+
+			//临时记录一下分数
+			//scoreDocs_1[k] = qu.FloatFormat(base_score, 2)
+			//scoreDocs_2[k] = qu.FloatFormat(dice_score, 2)
+
 		}
 	}
 	if len(scoreDocs) == 0 {
@@ -58,6 +67,22 @@ func calculateSimilarityScore(indexDocs map[int][]string, itemArr []string) stri
 	index, _ := getMaxScore(scoreDocs)
 	match_str := strings.Join(ul.NgrmDocIndex[index], "")
 	med_code := ul.ProductDocText[match_str]
+
+	//测试使用~存数据
+	//catalog := ul.CodeCatalog[med_code]
+	//ul.Mgo.Save("zzzzzz_query", map[string]interface{}{
+	//	"name":       strings.Join(itemArr, ""),
+	//	"match_name": match_str,
+	//	"score":      score,
+	//	"score_1":    scoreDocs_1[index],
+	//	"score_2":    scoreDocs_2[index],
+	//	"code":       med_code,
+	//	"class_1":    catalog["class_1"],
+	//	"class_2":    catalog["class_2"],
+	//	"class_3":    catalog["class_3"],
+	//	"class_4":    catalog["class_4"],
+	//})
+
 	return med_code
 }
 

+ 10 - 25
fieldproject_medical/data_service/src/export/export.go

@@ -1,46 +1,31 @@
 package export
 
 import (
-	"bidding"
 	"class"
 	log "github.com/donnie4w/go-logger/logger"
-	qu "qfw/util"
 	ul "util"
 )
 
-//导出测试数据
-func ExportBiddingTestData() {
+//更新修复导出的数据
+func UpdateRepairExportTestData(coll_name string) {
 	sess := ul.Mgo.GetMgoConn()
 	defer ul.Mgo.DestoryMongoConn(sess)
-	//临时取~测试数据~8月1日
-	q := map[string]interface{}{
-		"_id": map[string]interface{}{
-			"$gte": class.StringTOBsonId("62e6a7000000000000000000"),
-		},
-	}
+	q := map[string]interface{}{}
 	total, isok := 0, 0
-	it := sess.DB(ul.Mgo.DbName).C("bidding").Find(&q).Sort("_id").Iter()
+	it := sess.DB(ul.Mgo.DbName).C(coll_name).Find(&q).Sort("_id").Iter()
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total%10000 == 0 {
 			log.Debug("cur index ", total, "~", isok)
 		}
-		if isok > 5000 {
-			break
-		}
-		if qu.IntAll(tmp["extracttype"]) != 1 {
-			tmp = make(map[string]interface{})
-			continue
-		}
-		//是否为医疗行业数据
-		p_list := bidding.IsMarkInterfaceMap(tmp["purchasinglist"])
-		sub_list := bidding.IsMarkInterfaceArr(tmp["subscopeclass"])
-		_, b := bidding.IsIndustryInfo(sub_list)
-		if b && len(p_list) > 0 {
+		tmpid := class.BsonTOStringId(tmp["_id"])
+		s := ul.MysqlGlobalTool.FindOne(ul.V_Bid_Fieldtags, map[string]interface{}{
+			"infoid": tmpid,
+		}, "infoid", "")
+		if s != nil { //此条信息已存在
 			isok++
-			ul.Mgo.Save("zzzzzzzzz", tmp)
+			ul.Mgo.DeleteById(coll_name, tmpid)
 		}
 		tmp = make(map[string]interface{})
 	}
-
 	log.Debug("is over ", total, "~", isok)
 }

+ 9 - 8
fieldproject_medical/data_service/src/main.go

@@ -10,20 +10,21 @@ import (
 )
 
 func init() {
+	//ul.IsLocal = true
 	ul.InitClass()
-	return
 	service.InitService()
 }
 
 func main() {
-	log.Debug("run main ... ...")
-	gtid := "62e6a7000000000000000000"
-	lteid := "62e7f8800000000000000000"
-	bidding.RunPurchasingInfo(gtid, lteid)
-	time.Sleep(999 * time.Hour)
-	return
-
 	//测试相似度计算
 	http.ListenAndServe(":9991", nil)
 	time.Sleep(999 * time.Hour)
 }
+
+func mainT() {
+	gtid := "116cf96744ff2888b4a0c71b"
+	lteid := "930e34000000000000000000"
+	log.Debug("run main ... ...", gtid, "~", lteid)
+	bidding.RunPurchasingInfo(gtid, lteid)
+	time.Sleep(999 * time.Hour)
+}

+ 37 - 22
fieldproject_medical/data_service/src/mark

@@ -1,23 +1,38 @@
 for k, v := range Medical_Class_Code {
-		key := ""
-		if utf8.RuneCountInString(k) == 2 {
-			key = v + "_" + "_"
-		} else if utf8.RuneCountInString(k) == 4 {
-			code_1 := k[:2]
-			name_1 := Medical_Class_Code[code_1]
-			key = name_1 + "_" + v + "_"
-		} else if utf8.RuneCountInString(k) == 7 {
-			code_1 := k[:2]
-			name_1 := Medical_Class_Code[code_1]
-			code_2 := k[:4]
-			name_2 := Medical_Class_Code[code_2]
-			key = name_1 + "_" + name_2 + "_" + v
-		} else if utf8.RuneCountInString(k) == 11 {
-			key = "四级" + "_" + v
-		}
-		if key != "" {
-			if Medical_Class_Name[key] == "" {
-				Medical_Class_Name[key] = k
-			}
-		}
-	}
+	key := ""
+    if utf8.RuneCountInString(k) == 2 {
+    	key = v + "_" + "_"
+    } else if utf8.RuneCountInString(k) == 4 {
+    	code_1 := k[:2]
+    	name_1 := Medical_Class_Code[code_1]
+    	key = name_1 + "_" + v + "_"
+    } else if utf8.RuneCountInString(k) == 7 {
+  	    code_1 := k[:2]
+   		name_1 := Medical_Class_Code[code_1]
+   		code_2 := k[:4]
+   		name_2 := Medical_Class_Code[code_2]
+    	key = name_1 + "_" + name_2 + "_" + v
+    } else if utf8.RuneCountInString(k) == 11 {
+    	key = "四级" + "_" + v
+    }
+    if key != "" {
+    	if Medical_Class_Name[key] == "" {
+    		Medical_Class_Name[key] = k
+    	}
+    }
+}
+
+
+/*  3000万一段
+    130e34000000000000000000
+    5dd35291a5cb26b9b787710b
+
+    5dd35291a5cb26b9b787710b
+    5ff7d667f0f9d716c18babff
+
+    5ff7d667f0f9d716c18babff
+    616cf96744ff2888b4a0c71b
+
+    616cf96744ff2888b4a0c71b
+    630e34000000000000000000
+*/

+ 2 - 2
fieldproject_medical/data_service/src/service/service.go

@@ -11,11 +11,11 @@ var datalock sync.Mutex
 
 func InitService() {
 
-	http.HandleFunc("/getbid/tags", func(w http.ResponseWriter, r *http.Request) {
+	http.HandleFunc("/getbid/fieldtags", func(w http.ResponseWriter, r *http.Request) {
 		gtid := r.FormValue("gtid")
 		lteid := r.FormValue("lteid")
 		bidding.RunPurchasingInfo(gtid, lteid)
-		res, _ := json.Marshal("xxx ok")
+		res, _ := json.Marshal("ok")
 		w.Write(res)
 	})
 

+ 11 - 10
fieldproject_medical/data_service/src/util/initcfg.go

@@ -16,7 +16,8 @@ const (
 	V_Code_Productclass  = "code_productclass"
 	V_Product_Baseinfo   = "product_baseinfo"
 
-	S_Bidding_Coll = "bidding"
+	//S_Bidding_Coll = "bidding"
+	S_Bidding_Coll = "zktest_mysql_bidding_test"
 )
 
 var (
@@ -47,16 +48,16 @@ func InitClass() {
 //初始化mgo
 func initMgo() {
 	if IsLocal {
+		//Mgo = &MongodbSim{
+		//	MongodbAddr: "192.168.3.207:27001",
+		//	DbName:      "qfw_data",
+		//	Size:        10,
+		//	UserName:    "root",
+		//	Password:    "root",
+		//}
+		//Mgo.InitPool()
 		Mgo = &MongodbSim{
-			MongodbAddr: "192.168.3.207:27001",
-			DbName:      "qfw_data",
-			Size:        10,
-			UserName:    "root",
-			Password:    "root",
-		}
-		Mgo.InitPool()
-		Mgo = &MongodbSim{
-			MongodbAddr: "127.0.0.1:27017",
+			MongodbAddr: "192.168.3.207:27092",
 			DbName:      "zhengkun",
 			Size:        10,
 			UserName:    "",