浏览代码

医院补充~企业id判重标记~最新模式

zhengkun 2 年之前
父节点
当前提交
a7de47bb73

+ 12 - 4
fieldproject_medical/data_preparation/src/hospital/hospital.go

@@ -11,21 +11,29 @@ var (
 	save_hospital_coll    = "zktest_hospital_info"
 	merge_hospital_coll_1 = "zktest_hospital_info_merge"
 	merge_hospital_coll_2 = "zktest_hospital_info_merge_new"
+	merge_hospital_coll_3 = "zktest_hospital_info_merge_over"
+
 	history_hospital_coll = "f_hospital_codes"
 )
 
 func RunHospital() {
 	RunBuildHospitalInfo()
 	RunCleanHospitalInfo()
-	RunResetHospitalInfo(save_hospital_coll)
+	RunResetHospitalInfo(save_hospital_coll, "name")
 	//增加索引
 	createMgoIndex(save_hospital_coll, []string{"repeat_id"})
 	RunMergeHospitalInfo(save_hospital_coll, merge_hospital_coll_1)
 	//增加索引
 	createMgoIndex(merge_hospital_coll_1, []string{"repeat_id", "name"})
 	RunRepairHospitalInfo()
-	RunResetHospitalInfo(merge_hospital_coll_1)
+	RunResetHospitalInfo(merge_hospital_coll_1, "name")
 	RunMergeHospitalInfo(merge_hospital_coll_1, merge_hospital_coll_2)
+	//根据最终的company_id 重置判重标记
+	RunResetHospitalInfo(merge_hospital_coll_2, "company_id")
+	//增加索引
+	createMgoIndex(merge_hospital_coll_2, []string{"company_id"})
+	RunMergeHospitalInfo(merge_hospital_coll_2, merge_hospital_coll_3)
+
 }
 
 //开始执行医院数据
@@ -45,8 +53,8 @@ func RunCleanHospitalInfo() {
 	time.Sleep(10 * time.Second)
 }
 
-func RunResetHospitalInfo(source_coll string) {
-	resetRepeatHospital(source_coll)
+func RunResetHospitalInfo(source_coll string, key string) {
+	resetRepeatHospital(source_coll, key)
 	time.Sleep(10 * time.Second)
 }
 

+ 1 - 0
fieldproject_medical/data_preparation/src/hospital/hospital_repair.go

@@ -68,6 +68,7 @@ func repairHospital() {
 
 //补充自生id未校验数据
 func updateMarkIdHospital() {
+	log.Debug("开始更新机构~自生id~~ ")
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}

+ 11 - 6
fieldproject_medical/data_preparation/src/hospital/hospital_reset.go

@@ -7,25 +7,29 @@ import (
 )
 
 //重置重复标记~
-func resetRepeatHospital(coll_name string) {
+func resetRepeatHospital(coll_name string, repeat_name string) {
 	log.Debug("开始重置~~重复标记~~")
 	data_hospitals = map[string]string{}
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}
 	it := sess.DB(class.Save_Mgo.DbName).C(coll_name).Find(&q).Sort("_id").Select(map[string]interface{}{
-		"name": 1,
+		"name":       1,
+		"company_id": 1,
 	}).Iter()
 	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total%5000 == 0 {
 			log.Debug("cur index ", total, "~", isok)
 		}
-		name := qu.ObjToString(tmp["name"])
+		key := qu.ObjToString(tmp[repeat_name])
 		tmpid := class.BsonTOStringId(tmp["_id"])
+		if key == "" {
+			log.Debug("异常~", tmpid, "~", key)
+		}
 		update := map[string]interface{}{}
-		if data_hospitals[name] == "" {
-			data_hospitals[name] = tmpid
+		if data_hospitals[key] == "" {
+			data_hospitals[key] = tmpid
 			update = map[string]interface{}{
 				"repeat":    0,
 				"repeat_id": "",
@@ -34,9 +38,10 @@ func resetRepeatHospital(coll_name string) {
 			isok++
 			update = map[string]interface{}{
 				"repeat":    1,
-				"repeat_id": data_hospitals[name],
+				"repeat_id": data_hospitals[key],
 			}
 		}
+
 		class.Save_Mgo.UpdateById(coll_name, tmpid, map[string]interface{}{
 			"$set": update,
 		})

+ 1 - 1
fieldproject_medical/data_preparation/src/hospital/hospital_sql.go

@@ -19,7 +19,7 @@ func ExportHospitalInfoToMysql() {
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}
 	it := sess.DB(class.Save_Mgo.DbName).C(merge_hospital_coll_2).Find(&q).Sort("_id").Iter()
-	pool := make(chan bool, 1)
+	pool := make(chan bool, 3)
 	wg := &sync.WaitGroup{}
 	total := 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {

+ 2 - 3
fieldproject_medical/data_preparation/src/main.go

@@ -4,7 +4,6 @@ import (
 	"class"
 	"flag"
 	log "github.com/donnie4w/go-logger/logger"
-	"hospital"
 	"net/http"
 	"time"
 )
@@ -16,9 +15,9 @@ func init() {
 func main() {
 	log.Debug("run main ... ")
 	//处理医院
-	hospital.RunHospital()
+	//hospital.RunHospital()
 	//导入信息~医疗关联sql表
-	hospital.ExportHospitalInfoToMysql()
+	//hospital.ExportHospitalInfoToMysql()
 
 	//代码表构建
 	//vcode.RunVCodeData()

+ 52 - 47
fieldproject_medical/data_service/src/bidding/bidding.go

@@ -5,16 +5,19 @@ import (
 	log "github.com/donnie4w/go-logger/logger"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	qu "qfw/util"
+	"regexp"
 	"strings"
 	"sync"
 	"unicode/utf8"
 	ul "util"
 )
 
-var fields = map[string]interface{}{"toptype": 1, "subtype": 1, "subscopeclass": 1, "extracttype": 1, "purchasinglist": 1}
-
+var fields = map[string]interface{}{"toptype": 1, "subtype": 1, "s_topscopeclass": 1, "s_subscopeclass": 1, "buyerclass": 1, "buyer": 1, "extracttype": 1, "purchasinglist": 1}
 var datalock, numlock sync.Mutex
 
+var A_FieldReg *regexp.Regexp = regexp.MustCompile("(医疗卫生_设备|医疗卫生_耗材)")
+var B_FieldReg *regexp.Regexp = regexp.MustCompile("^(医疗|卫健委)$")
+
 func RunPurchasingInfo(gtid string, lteid string) {
 	log.Debug("开始处理标讯信息~~~", gtid, "~", lteid)
 	sess := ul.Mgo.GetMgoConn()
@@ -26,11 +29,11 @@ func RunPurchasingInfo(gtid string, lteid string) {
 		},
 	}
 	it := sess.DB(ul.Mgo.DbName).C(ul.S_Bidding_Coll).Find(&q).Sort("_id").Select(fields).Iter()
-	pool := make(chan bool, 8)
+	pool := make(chan bool, 6)
 	wg := &sync.WaitGroup{}
 	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%1000 == 0 {
+		if total%10000 == 0 {
 			log.Debug("cur index ", total, "~", isok)
 		}
 		if qu.IntAll(tmp["extracttype"]) != 1 {
@@ -67,11 +70,16 @@ func RunPurchasingInfo(gtid string, lteid string) {
 //构建标的物基本信息
 func createBaseInfo(tmp map[string]interface{}) bool {
 	p_list := IsMarkInterfaceMap(tmp["purchasinglist"])
-	sub_list := IsMarkInterfaceArr(tmp["subscopeclass"])
 	infoid := class.BsonTOStringId(tmp["_id"])
+	s_topscopeclass := qu.ObjToString(tmp["s_topscopeclass"])
+	s_subscopeclass := qu.ObjToString(tmp["s_subscopeclass"])
+	buyerclass := qu.ObjToString(tmp["buyerclass"])
+	buyer := qu.ObjToString(tmp["buyer"])
 	bid_topsubtype_code := confrimTopSubCode(qu.ObjToString(tmp["toptype"]), qu.ObjToString(tmp["subtype"]))
-	b, industry_code, new_plist := checkWhetherValidInfo(p_list, sub_list)
-	if b {
+	//是否符合领域范围
+	industry_code, isField := IsIndustryInfo(s_topscopeclass, s_subscopeclass, buyerclass, buyer)
+	if isField {
+		new_plist := createNewPurchasingInfo(p_list)
 		//标的物基本信息~记录标签
 		insertBaseInfo(new_plist, infoid, bid_topsubtype_code)
 		//招标信息领域标签
@@ -79,37 +87,23 @@ func createBaseInfo(tmp map[string]interface{}) bool {
 		//招标信息行业标签
 		insertIndustryTag(industry_code, infoid)
 	}
-	return b
-}
-
-//return 是否有效~行业代码~新标的物信息
-func checkWhetherValidInfo(p_list []map[string]interface{}, sub_list []string) (bool, string, []map[string]interface{}) {
-	is_exists := false
-	industry_code, b := IsIndustryInfo(sub_list)
-	new_plist := []map[string]interface{}{}
-	if len(p_list) > 0 && b {
-		is_exists, new_plist = createNewPurchasingInfo(p_list)
-	}
-	return is_exists, industry_code, new_plist
+	return isField
 }
 
 //返回新的~标的物信息(分类+整合)
-func createNewPurchasingInfo(p_list []map[string]interface{}) (bool, []map[string]interface{}) {
-	is_exists := false
+func createNewPurchasingInfo(p_list []map[string]interface{}) []map[string]interface{} {
 	new_plist := []map[string]interface{}{}
 	for _, v := range p_list {
 		//去重处理~名称~品牌~型号~暂无重复
 		data := map[string]interface{}{}
 		itemname := qu.ObjToString(v["itemname"])
-		if itemname == "" {
+		if itemname == "" || utf8.RuneCountInString(itemname) > 30 {
 			continue
 		}
 		//根据标的物名字~打上具体的分类数据~
-		is_yl, yl_code := confrimYlClassCode(itemname)
+		//重要重要重要
+		yl_code := confrimTargetMedicalClass(itemname)
 
-		if !is_exists && is_yl {
-			is_exists = true //证明此条信息有效果
-		}
 		data["medical_equipment_code"] = yl_code
 		data["itemname"] = itemname
 		data["brand"] = qu.ObjToString(v["brandname"])
@@ -119,7 +113,6 @@ func createNewPurchasingInfo(p_list []map[string]interface{}) (bool, []map[strin
 		}
 		data["model"] = model
 		data["specs"] = qu.ObjToString(v["specs"])
-
 		if v["unitname"] != nil {
 			data["unit"] = qu.ObjToString(v["unitname"])
 		}
@@ -141,37 +134,49 @@ func createNewPurchasingInfo(p_list []map[string]interface{}) (bool, []map[strin
 		}
 		new_plist = append(new_plist, data)
 	}
-	return is_exists, new_plist
+	return new_plist
 }
 
 //是否为医疗行业数据
-func IsIndustryInfo(sub_list []string) (string, bool) {
+func IsIndustryInfo(top_class string, sub_class string, buyer_class string, buyer string) (string, bool) {
 	code := ""
-	b := false
-	for _, v := range sub_list {
+	//两组条件判断是否为医疗领域数据~
+	is_A, is_B := false, false
+	if A_FieldReg.MatchString(sub_class) || (sub_class == "" && strings.Contains(top_class, "医疗卫生")) {
+		is_A = true
+	}
+	if !is_A {
+		return "", false
+	}
+	if B_FieldReg.MatchString(buyer_class) || (buyer_class == "制造业" && strings.Contains(buyer, "医疗器械")) {
+		is_B = true
+	}
+	if !is_B {
+		return "", false
+	}
+	//拼接code
+	top_arr := strings.Split(top_class, ",")
+	for _, v := range top_arr {
+		vc := ul.Bid_Industry[v]["code"]
+		if code != "" {
+			code += ","
+		}
+		code += vc
+	}
+	sub_arr := strings.Split(sub_class, ",")
+	for _, v := range sub_arr {
 		arr := strings.Split(v, "_")
 		if len(arr) == 2 {
 			industry_1 := arr[0]
 			industry_2 := arr[1]
-			if industry_1 == "医疗卫生" && (industry_2 == "设备" || industry_2 == "耗材") {
-				b = true
-				code = ul.Bid_Industry[industry_1][industry_2]
-				break
+			vc := ul.Bid_Industry[industry_1][industry_2]
+			if code != "" {
+				code += ","
 			}
+			code += vc
 		}
 	}
-	return code, b
-}
-
-//根据标的物名称~打医疗分类
-func confrimYlClassCode(name string) (bool, string) {
-	is_b := false
-	yl_code := ""
-	yl_code = ConfrimTargetMedicalClass(name)
-	if utf8.RuneCountInString(yl_code) > 0 {
-		is_b = true
-	}
-	return is_b, yl_code
+	return code, true
 }
 
 //根据标的物名称~打医疗分类

+ 3 - 3
fieldproject_medical/data_service/src/bidding/purchasing.go

@@ -9,11 +9,11 @@ import (
 )
 
 //计算相似度分类
-func ConfrimTargetMedicalClass(name string) string {
+func confrimTargetMedicalClass(name string) string {
 	//清洗~名称
 	name = cleanItemName(name)
-	if utf8.RuneCountInString(name) <= 2 {
-		return "" //过短数据不进行分类
+	if utf8.RuneCountInString(name) <= 2 || utf8.RuneCountInString(name) >= 30 {
+		return "" //过短数据不进行分类
 	}
 	//完全匹配校验
 	b, med_code := completeMatching(name)

+ 4 - 1
fieldproject_medical/data_service/src/main.go

@@ -11,12 +11,15 @@ import (
 
 func init() {
 	ul.InitClass()
+	return
 	service.InitService()
 }
 
 func main() {
 	log.Debug("run main ... ...")
-	bidding.RunPurchasingInfo("62e6a7000000000000000000", "62e7f8800000000000000000")
+	gtid := "62e6a7000000000000000000"
+	lteid := "62e7f8800000000000000000"
+	bidding.RunPurchasingInfo(gtid, lteid)
 	time.Sleep(999 * time.Hour)
 	return
 

+ 8 - 0
fieldproject_medical/data_service/src/util/initcfg.go

@@ -55,6 +55,14 @@ func initMgo() {
 			Password:    "root",
 		}
 		Mgo.InitPool()
+		Mgo = &MongodbSim{
+			MongodbAddr: "127.0.0.1:27017",
+			DbName:      "zhengkun",
+			Size:        10,
+			UserName:    "",
+			Password:    "",
+		}
+		Mgo.InitPool()
 	} else {
 		Mgo = &MongodbSim{
 			MongodbAddr: "172.17.145.163:27083,172.17.4.187:27082",