瀏覽代碼

历史名录~校验

zhengkun 3 年之前
父節點
當前提交
633b6880a5

+ 10 - 10
fieldproject_medical/data_preparation/src/class/initdata.go

@@ -65,17 +65,8 @@ func InitClass() {
 //初始化mgo
 func initMgo() {
 	if isLocal {
-		//Save_Mgo = &MongodbSim{
-		//	MongodbAddr: "127.0.0.1:27017",
-		//	DbName:      "zhengkun",
-		//	Size:        10,
-		//	UserName:    "",
-		//	Password:    "",
-		//}
-		//Save_Mgo.InitPool()
-
 		Save_Mgo = &MongodbSim{
-			MongodbAddr: "192.168.3.207:27092",
+			MongodbAddr: "127.0.0.1:27017",
 			DbName:      "zhengkun",
 			Size:        10,
 			UserName:    "",
@@ -83,6 +74,15 @@ func initMgo() {
 		}
 		Save_Mgo.InitPool()
 
+		//Save_Mgo = &MongodbSim{
+		//	MongodbAddr: "192.168.3.207:27092",
+		//	DbName:      "zhengkun",
+		//	Size:        10,
+		//	UserName:    "",
+		//	Password:    "",
+		//}
+		//Save_Mgo.InitPool()
+
 		Spi_Mgo = &MongodbSim{
 			MongodbAddr: "127.0.0.1:27017",
 			DbName:      "zhengkun",

+ 37 - 4
fieldproject_medical/data_preparation/src/hospital/hospital.go

@@ -5,9 +5,31 @@ import (
 	"time"
 )
 
+var (
+	save_hospital_coll    = "zktest_hospital_info"
+	merge_hospital_coll_1 = "zktest_hospital_info_merge"
+	merge_hospital_coll_2 = "zktest_hospital_info_merge_new"
+	history_hospital_coll = "f_hospital_codes"
+
+	//merge_coll_second = "zktest_hospital_info_merge_new"
+	//history_coll      = "f_hospital_codes"
+)
+
+func RunHospital() {
+	RunBuildHospitalInfo()
+	RunCleanHospitalInfo()
+	RunResetHospitalInfo(save_hospital_coll)
+	RunMergeHospitalInfo(save_hospital_coll, merge_hospital_coll_1)
+}
+func RunHospitalOnline() {
+	RunRepairHospitalInfo()
+	RunResetHospitalInfo(merge_hospital_coll_1)
+	RunMergeHospitalInfo(merge_hospital_coll_1, merge_hospital_coll_2)
+	//彻底结束~是否删除~中间关联表?
+}
+
 //开始执行医院数据
 func RunBuildHospitalInfo() {
-	//整合整体~医院信息
 	log.Debug("开始整合医院信息~~~")
 	dealWithHospitalBaseInfo("f_hospital_39jk", "hospital_39jk_ain_depart", "39健康")
 	dealWithHospitalBaseInfo("f_hospital_hdf", "hospital_hdf_ain_depart", "好大夫")
@@ -21,12 +43,23 @@ func RunCleanHospitalInfo() {
 	//清洗~医院信息~名称
 	cleanHospitalInfoData()
 	time.Sleep(10 * time.Second)
-	resetRepeatHospital()
+}
+
+func RunResetHospitalInfo(source_coll string) {
+	resetRepeatHospital(source_coll)
 	time.Sleep(10 * time.Second)
 }
 
 //合并判重后的医院信息~
-func RunMergeHospitalInfo() {
-	mergeRepeatHospital()
+func RunMergeHospitalInfo(source_coll string, output_coll string) {
+	mergeRepeatHospital(source_coll, output_coll)
+	time.Sleep(10 * time.Second)
+}
+
+//修复信息~历史别名校验
+func RunRepairHospitalInfo() {
+	repairHospital()
+	time.Sleep(10 * time.Second)
+	compareHospital()
 	time.Sleep(10 * time.Second)
 }

+ 2 - 2
fieldproject_medical/data_preparation/src/hospital/hospital_clean.go

@@ -23,7 +23,7 @@ func cleanHospitalInfoData() {
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}
-	it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Iter()
+	it := sess.DB(class.Save_Mgo.DbName).C(save_hospital_coll).Find(&q).Sort("_id").Iter()
 	total := 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total%5000 == 0 {
@@ -42,7 +42,7 @@ func cleanHospitalInfoData() {
 		cleanHospitalType(tmp, &update)
 
 		if len(update) > 0 && tmpid != "" {
-			class.Save_Mgo.UpdateById(save_coll, tmpid, map[string]interface{}{
+			class.Save_Mgo.UpdateById(save_hospital_coll, tmpid, map[string]interface{}{
 				"$set": update,
 			})
 		}

+ 160 - 0
fieldproject_medical/data_preparation/src/hospital/hospital_extra.go

@@ -0,0 +1,160 @@
+package hospital
+
+import (
+	"class"
+	log "github.com/donnie4w/go-logger/logger"
+	"github.com/uuid"
+	qu "qfw/util"
+	"strings"
+	"unicode/utf8"
+)
+
+//根据采集的历史名录补充数据
+func compareHospital() {
+	log.Debug("开始修复~历史名录~数据~~~")
+	sess := class.Save_Mgo.GetMgoConn()
+	defer class.Save_Mgo.DestoryMongoConn(sess)
+	q := map[string]interface{}{
+		"mark_id": 0,
+	}
+	it := sess.DB(class.Save_Mgo.DbName).C(merge_hospital_coll_1).Find(&q).Sort("_id").Iter()
+	total, isok := 0, 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%1000 == 0 {
+			log.Debug("cur index ", total, isok)
+		}
+		name := qu.ObjToString(tmp["name"])
+		alias := qu.ObjToString(tmp["alias"])
+		if name != "" {
+			dataArr, _ := class.Spi_Mgo.Find(history_hospital_coll, map[string]interface{}{
+				"search_name": name,
+			}, nil, map[string]interface{}{
+				"hospital_name": 1,
+				"alias_name":    1,
+			})
+			if len(dataArr) > 0 {
+				b, finally_name, finally_alias := compareHospitalNameData(name, dataArr, alias)
+				if b {
+					isok++
+					//根据返回结果~更新当前表数据
+					info_id := class.BsonTOStringId(tmp["_id"])
+					info_update := map[string]interface{}{}
+					info_update["name"] = finally_name
+					info_update["alias"] = finally_alias
+
+					cur_data := class.Save_Mgo.FindOne(merge_hospital_coll_1, map[string]interface{}{
+						"name": finally_name,
+					})
+					if len(cur_data) > 2 && cur_data != nil { //能找到相同数据需要更新两次
+						company_id := qu.ObjToString(cur_data["company_id"])
+						mark_id := qu.IntAll(cur_data["mark_id"])
+						source_id := class.BsonTOStringId(cur_data["_id"])
+						if company_id == "" {
+							new_company_id := uuid.New().String()
+							new_company_id = strings.ReplaceAll(new_company_id, "-", "")
+							source_update := map[string]interface{}{
+								"company_id": new_company_id,
+								"mark_id":    2,
+							}
+							if source_id != info_id {
+								class.Save_Mgo.UpdateById(merge_hospital_coll_1, source_id, map[string]interface{}{
+									"$set": source_update,
+								})
+							}
+							info_update["company_id"] = new_company_id
+							info_update["mark_id"] = 2
+						} else {
+							info_update["company_id"] = company_id
+							info_update["mark_id"] = mark_id
+						}
+					} else { //去关联凭安库~是否能找到指定id ~ 自生id
+						new_company_id := inquirBaseInfoid(finally_name)
+						if new_company_id != "" {
+							info_update["company_id"] = new_company_id
+							info_update["mark_id"] = 1
+						} else {
+							c_id := uuid.New().String()
+							c_id = strings.ReplaceAll(c_id, "-", "")
+							info_update["company_id"] = c_id
+							info_update["mark_id"] = 2
+						}
+					}
+					class.Save_Mgo.UpdateById(merge_hospital_coll_1, info_id, map[string]interface{}{
+						"$set": info_update,
+					})
+					if info_update == nil {
+						log.Debug("异常~~~")
+					}
+				}
+			}
+		}
+
+		tmp = make(map[string]interface{})
+	}
+
+	log.Debug("is over ", total, isok)
+}
+
+//比较~历史名录~数据
+func compareHospitalNameData(search_name string, dataArr []map[string]interface{}, source_alias string) (bool, string, string) {
+	for _, v := range dataArr {
+		hospital_name := qu.ObjToString(v["hospital_name"])
+		alias_name := qu.ObjToString(v["alias_name"])
+		if alias_name == "" || hospital_name == "" {
+			continue
+		}
+		aliasArr := strings.Split(alias_name, ",")
+		b := compareAliasName(search_name, aliasArr)
+		if b {
+			//合并别名信息
+			new_alias_str := mergeAliasName(search_name, hospital_name, source_alias, alias_name)
+			return true, hospital_name, new_alias_str
+		}
+	}
+	return false, "", ""
+}
+
+//对比别名~
+func compareAliasName(search_name string, aliasArr []string) bool {
+	for _, v := range aliasArr {
+		if v == search_name {
+			return true
+		}
+	}
+	return false
+}
+
+func mergeAliasName(search_name string, hospital_name string, old_alias string, new_alias string) string {
+	alias_name := ""
+	alias_arr := []string{}
+	alias_dict := map[string]string{}
+	old_alias_arr := strings.Split(old_alias, ",")
+	new_alias_arr := strings.Split(new_alias, ",")
+	if hospital_name != search_name {
+		alias_arr = append(alias_arr, search_name)
+		alias_dict[search_name] = search_name
+	}
+
+	for _, v := range old_alias_arr {
+		str := strings.ReplaceAll(v, " ", "")
+		if utf8.RuneCountInString(str) > 2 && str != "" && str != hospital_name {
+			if alias_dict[str] == "" {
+				alias_arr = append(alias_arr, str)
+				alias_dict[str] = str
+			}
+		}
+	}
+	for _, v := range new_alias_arr {
+		str := strings.ReplaceAll(v, " ", "")
+		if utf8.RuneCountInString(str) > 2 && str != "" && str != hospital_name {
+			if alias_dict[str] == "" {
+				alias_arr = append(alias_arr, str)
+				alias_dict[str] = str
+			}
+		}
+	}
+	if len(alias_arr) > 0 {
+		alias_name = strings.Join(alias_arr, ",")
+	}
+	return alias_name
+}

+ 5 - 6
fieldproject_medical/data_preparation/src/hospital/hospital_info.go

@@ -9,11 +9,6 @@ import (
 	"unicode/utf8"
 )
 
-var (
-	save_coll  = "zktest_hospital_info"
-	merge_coll = "zktest_hospital_info_merge"
-)
-
 //处理医院基本信息~涉及多张表
 func dealWithHospitalBaseInfo(hos_coll string, depart_coll string, sourceweb string) {
 	sess := class.Save_Mgo.GetMgoConn()
@@ -44,7 +39,7 @@ func dealWithHospitalBaseInfo(hos_coll string, depart_coll string, sourceweb str
 				data_info["alias"] = qu.ObjToString(tmp["alias"])
 				//网站来源
 				data_info["sourceweb"] = sourceweb
-				class.Save_Mgo.Save(save_coll, data_info)
+				class.Save_Mgo.Save(save_hospital_coll, data_info)
 			}
 		}(tmp)
 		tmp = make(map[string]interface{})
@@ -90,6 +85,10 @@ func treatHospitalInfo(tmp map[string]interface{}) map[string]interface{} {
 	data["city"] = qu.ObjToString(tmp["city"])
 	data["district"] = qu.ObjToString(tmp["district"])
 
+	data["company_id"] = ""
+	data["mark_id"] = 0
+	data["pcompany_id"] = ""
+
 	return data
 }
 

+ 8 - 7
fieldproject_medical/data_preparation/src/hospital/hospital_merge.go

@@ -13,23 +13,24 @@ var field_info = []string{
 	"level", "type", "business_type",
 	"area", "city", "district",
 	"alias", "departs",
+	"company_id", "mark_id", "pcompany_id",
 }
 
 //最终合并判重后的医院数据
-func mergeRepeatHospital() {
-	log.Debug("开始合并重后的数据~~~")
+func mergeRepeatHospital(source_coll string, output_coll string) {
+	log.Debug("开始合并~判重后的数据~~~")
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{
 		"repeat": 0,
 	}
-	it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Iter()
+	it := sess.DB(class.Save_Mgo.DbName).C(source_coll).Find(&q).Sort("_id").Iter()
 	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total%1000 == 0 {
 			log.Debug("cur index ", total, "~", isok)
 		}
-		dataArr, _ := class.Save_Mgo.Find(save_coll, map[string]interface{}{
+		dataArr, _ := class.Save_Mgo.Find(source_coll, map[string]interface{}{
 			"repeat_id": class.BsonTOStringId(tmp["_id"]),
 		}, nil, nil)
 		isok = isok + len(dataArr) + 1
@@ -39,7 +40,7 @@ func mergeRepeatHospital() {
 		}
 		if len(new_arr) > 0 {
 			new_data := mergeHospitalInfo(new_arr)
-			class.Save_Mgo.Save(merge_coll, new_data)
+			class.Save_Mgo.Save(output_coll, new_data)
 		} else {
 			log.Debug("重复组异常~", tmp["_id"])
 		}
@@ -61,8 +62,8 @@ func mergeHospitalInfo(dataArr []map[string]interface{}) map[string]interface{}
 	//多组信息合并~
 	name := qu.ObjToString(new_data["name"])
 	for _, k := range field_info {
-		//是否合并~等级 性质  类型
-		if k == "area" || k == "city" || k == "district" {
+		if k == "area" || k == "city" || k == "district" ||
+			k == "company_id" || k == "mark_id" || k == "pcompany_id" {
 			continue
 		}
 		if k == "alias" { //合并别名~

+ 7 - 11
fieldproject_medical/data_preparation/src/hospital/hospital_repair.go

@@ -3,18 +3,17 @@ package hospital
 import (
 	"class"
 	log "github.com/donnie4w/go-logger/logger"
-	"github.com/uuid"
 	qu "qfw/util"
-	"strings"
 	"sync"
 )
 
 //修复 ~ 基本信息 ~ 企业id ~地域等
-func RunRepairHospitalInfo() {
+func repairHospital() {
+	log.Debug("开始修复数据~补充地域~补充企业标识~~~")
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}
-	it := sess.DB(class.Save_Mgo.DbName).C(merge_coll).Find(&q).Sort("_id").Iter()
+	it := sess.DB(class.Save_Mgo.DbName).C(merge_hospital_coll_1).Find(&q).Sort("_id").Iter()
 	pool := make(chan bool, 5)
 	wg := &sync.WaitGroup{}
 	total := 0
@@ -39,13 +38,10 @@ func RunRepairHospitalInfo() {
 			if city == "" {
 				supplementRegionally(&area, &city, &district, name, address)
 			}
-
-			company_id, mark_id := "", 1
+			company_id, mark_id := "", 0
 			company_id = inquirBaseInfoid(name)
-			if company_id == "" {
-				company_id = uuid.New().String()
-				company_id = strings.ReplaceAll(company_id, "-", "")
-				mark_id = 0
+			if company_id != "" {
+				mark_id = 1
 			}
 			update["area"] = area
 			update["city"] = city
@@ -55,7 +51,7 @@ func RunRepairHospitalInfo() {
 			update["pcompany_id"] = ""
 
 			if tmpid != "" {
-				class.Save_Mgo.UpdateById(merge_coll, tmpid, map[string]interface{}{
+				class.Save_Mgo.UpdateById(merge_hospital_coll_1, tmpid, map[string]interface{}{
 					"$set": update,
 				})
 			}

+ 47 - 0
fieldproject_medical/data_preparation/src/hospital/hospital_reset.go

@@ -0,0 +1,47 @@
+package hospital
+
+import (
+	"class"
+	"github.com/go-xweb/log"
+	qu "qfw/util"
+)
+
+//重置重复标记~
+func resetRepeatHospital(coll_name string) {
+	log.Debug("开始重置~~重复标记~~")
+	data_hospitals = map[string]string{}
+	sess := class.Save_Mgo.GetMgoConn()
+	defer class.Save_Mgo.DestoryMongoConn(sess)
+	q := map[string]interface{}{}
+	it := sess.DB(class.Save_Mgo.DbName).C(coll_name).Find(&q).Sort("_id").Select(map[string]interface{}{
+		"name": 1,
+	}).Iter()
+	total, isok := 0, 0
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%5000 == 0 {
+			log.Debug("cur index ", total, "~", isok)
+		}
+		name := qu.ObjToString(tmp["name"])
+		tmpid := class.BsonTOStringId(tmp["_id"])
+		update := map[string]interface{}{}
+		if data_hospitals[name] == "" {
+			data_hospitals[name] = tmpid
+			update = map[string]interface{}{
+				"repeat":    0,
+				"repeat_id": "",
+			}
+		} else {
+			isok++
+			update = map[string]interface{}{
+				"repeat":    1,
+				"repeat_id": data_hospitals[name],
+			}
+		}
+		class.Save_Mgo.UpdateById(coll_name, tmpid, map[string]interface{}{
+			"$set": update,
+		})
+		tmp = make(map[string]interface{})
+	}
+	log.Debug("重置~判重完毕~over~", total, "~", isok)
+
+}

+ 1 - 1
fieldproject_medical/data_preparation/src/hospital/hospital_sql.go

@@ -18,7 +18,7 @@ func ExportHospitalInfoToMysql() {
 	sess := class.Save_Mgo.GetMgoConn()
 	defer class.Save_Mgo.DestoryMongoConn(sess)
 	q := map[string]interface{}{}
-	it := sess.DB(class.Save_Mgo.DbName).C(merge_coll).Find(&q).Sort("_id").Iter()
+	it := sess.DB(class.Save_Mgo.DbName).C(merge_hospital_coll_2).Find(&q).Sort("_id").Iter()
 	pool := make(chan bool, 1)
 	wg := &sync.WaitGroup{}
 	total, isok := 0, 0

+ 1 - 41
fieldproject_medical/data_preparation/src/hospital/hospital_ways.go

@@ -3,7 +3,6 @@ package hospital
 import (
 	"class"
 	"encoding/json"
-	log "github.com/donnie4w/go-logger/logger"
 	"go.mongodb.org/mongo-driver/bson/primitive"
 	"io/ioutil"
 	"net/http"
@@ -39,6 +38,7 @@ func inquirBaseInfoid(name string) string {
 
 //补充~地域区划
 func supplementRegionally(area *string, city *string, district *string, name string, address string) {
+	return
 	//查询其他表~
 	if *area == "" || *area == "全国" {
 		*area = "全国"
@@ -387,46 +387,6 @@ func standardname(s_name string) (bool, string, string) {
 	return is_clean, new_name, his_name
 }
 
-//重置重复标记~
-func resetRepeatHospital() {
-	log.Debug("重置~~重复标记~~")
-	data_hospitals = map[string]string{}
-	sess := class.Save_Mgo.GetMgoConn()
-	defer class.Save_Mgo.DestoryMongoConn(sess)
-	q := map[string]interface{}{}
-	it := sess.DB(class.Save_Mgo.DbName).C(save_coll).Find(&q).Sort("_id").Select(map[string]interface{}{
-		"name": 1,
-	}).Iter()
-	total, isok := 0, 0
-	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
-		if total%5000 == 0 {
-			log.Debug("cur index ", total, "~", isok)
-		}
-		name := qu.ObjToString(tmp["name"])
-		tmpid := class.BsonTOStringId(tmp["_id"])
-		update := map[string]interface{}{}
-		if data_hospitals[name] == "" {
-			data_hospitals[name] = tmpid
-			update = map[string]interface{}{
-				"repeat":    0,
-				"repeat_id": "",
-			}
-		} else {
-			isok++
-			update = map[string]interface{}{
-				"repeat":    1,
-				"repeat_id": data_hospitals[name],
-			}
-		}
-		class.Save_Mgo.UpdateById(save_coll, tmpid, map[string]interface{}{
-			"$set": update,
-		})
-		tmp = make(map[string]interface{})
-	}
-	log.Debug("重置~判重完毕~over~", total, "~", isok)
-
-}
-
 //构建指定结构
 func convertInterfaceMap(t interface{}) []map[string]interface{} {
 	p_list := []map[string]interface{}{}

+ 3 - 6
fieldproject_medical/data_preparation/src/main.go

@@ -13,13 +13,10 @@ func init() {
 }
 func main() {
 	log.Debug("run main ... ")
-
-	//医院基本信息整合~线下~加载代码表~临时
-	//hospital.RunBuildHospitalInfo()
-	//hospital.RunCleanHospitalInfo()
-	//hospital.RunMergeHospitalInfo()
+	//处理医院
+	//hospital.RunHospital()
 	//暂时需要线上补充~数据
-	//hospital.RunRepairHospitalInfo()
+	//hospital.RunHospitalOnline()
 	//导入信息~医疗关联sql表
 	//hospital.ExportHospitalInfoToMysql()
 

+ 23 - 17
fieldproject_medical/data_service/src/bidding/bidding.go

@@ -11,7 +11,9 @@ import (
 	ul "util"
 )
 
-var fields = map[string]interface{}{"toptype": 1, "subtype": 1, "subscopeclass": 1, "repeat": 1, "yl_purchasinglist": 1}
+var fields = map[string]interface{}{"toptype": 1, "subtype": 1, "subscopeclass": 1, "extracttype": 1, "purchasinglist": 1}
+
+var datalock, numlock sync.Mutex
 
 func RunPurchasingInfo(gtid string, lteid string) {
 	log.Debug("开始处理标讯信息~~~", gtid, "~", lteid)
@@ -23,15 +25,15 @@ func RunPurchasingInfo(gtid string, lteid string) {
 			"$lte": class.StringTOBsonId(lteid),
 		},
 	}
-	it := sess.DB(ul.Mgo.DbName).C("zktest_mysql_bidding").Find(&q).Sort("_id").Select(fields).Iter()
+	it := sess.DB(ul.Mgo.DbName).C(ul.S_Bidding_Coll).Find(&q).Sort("_id").Select(fields).Iter()
 	pool := make(chan bool, 10)
 	wg := &sync.WaitGroup{}
-	total := 0
+	total, isok := 0, 0
 	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
 		if total%100 == 0 {
-			log.Debug("cur index ", total)
+			log.Debug("cur index ", total, "~", isok)
 		}
-		if qu.IntAll(tmp["repeat"]) == 1 {
+		if qu.IntAll(tmp["extracttype"]) != 1 {
 			tmp = make(map[string]interface{})
 			continue
 		}
@@ -43,26 +45,30 @@ func RunPurchasingInfo(gtid string, lteid string) {
 				wg.Done()
 			}()
 			//构建标的物基本信息
-			createBaseInfo(tmp)
-
+			b := createBaseInfo(tmp)
+			if b {
+				numlock.Lock()
+				isok++
+				numlock.Unlock()
+			}
 		}(tmp)
 		tmp = make(map[string]interface{})
 	}
 	wg.Wait()
 
-	log.Debug("is over ", total)
+	log.Debug("is over ", total, "~", isok)
 }
 
 //构建标的物基本信息
-func createBaseInfo(tmp map[string]interface{}) {
-	p_list := isMarkInterfaceMap(tmp["yl_purchasinglist"])
-	sub_list := isMarkInterfaceArr(tmp["subscopeclass"])
+func createBaseInfo(tmp map[string]interface{}) bool {
+	p_list := IsMarkInterfaceMap(tmp["purchasinglist"])
+	sub_list := IsMarkInterfaceArr(tmp["subscopeclass"])
 	infoid := class.BsonTOStringId(tmp["_id"])
 	bid_topsubtype_code := confrimTopSubCode(qu.ObjToString(tmp["toptype"]), qu.ObjToString(tmp["subtype"]))
 	b, industry_code, new_plist := checkWhetherValidInfo(p_list, sub_list)
 	if b {
 		//标的物基本信息~记录标签
-		insertBaseInfo(new_plist, infoid, bid_topsubtype_code)
+		//insertBaseInfo(new_plist, infoid, bid_topsubtype_code)
 		//招标信息领域标签
 		//insertFiledTag(infoid)
 		//招标信息行业标签
@@ -71,13 +77,13 @@ func createBaseInfo(tmp map[string]interface{}) {
 	if industry_code != "" && len(new_plist) > 0 && bid_topsubtype_code != "" && infoid != "" {
 
 	}
-
+	return b
 }
 
 //return 是否有效~行业代码~新标的物信息
 func checkWhetherValidInfo(p_list []map[string]interface{}, sub_list []string) (bool, string, []map[string]interface{}) {
 	is_exists := false
-	industry_code, b := isIndustryInfo(sub_list)
+	industry_code, b := IsIndustryInfo(sub_list)
 	new_plist := []map[string]interface{}{}
 	if len(p_list) > 0 && b {
 		is_exists, new_plist = createNewPurchasingInfo(p_list)
@@ -136,7 +142,7 @@ func createNewPurchasingInfo(p_list []map[string]interface{}) (bool, []map[strin
 }
 
 //是否为医疗行业数据
-func isIndustryInfo(sub_list []string) (string, bool) {
+func IsIndustryInfo(sub_list []string) (string, bool) {
 	code := ""
 	b := false
 	for _, v := range sub_list {
@@ -179,7 +185,7 @@ func confrimTopSubCode(toptype string, subtype string) string {
 }
 
 //return
-func isMarkInterfaceMap(t interface{}) []map[string]interface{} {
+func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 	p_list := []map[string]interface{}{}
 	if yl_list_1, ok_1 := t.(primitive.A); ok_1 {
 		p_list = qu.ObjArrToMapArr(yl_list_1)
@@ -192,7 +198,7 @@ func isMarkInterfaceMap(t interface{}) []map[string]interface{} {
 }
 
 //return
-func isMarkInterfaceArr(t interface{}) []string {
+func IsMarkInterfaceArr(t interface{}) []string {
 	sub_list := []string{}
 	if list_1, ok_1 := t.(primitive.A); ok_1 {
 		sub_list = qu.ObjArrToStringArr(list_1)

+ 10 - 9
fieldproject_medical/data_service/src/bidding/purchasing.go

@@ -15,11 +15,12 @@ func ConfrimTargetMedicalClass(name string) string {
 	//完全匹配校验
 	b, med_code := completeMatching(name)
 	if b {
+		//log.Debug("完全匹配:", name, "~", med_code)
 		return med_code
 	}
-	itemArr := ul.GSE.Cut(name, true)
 
 	//找到所有~词组的集合
+	itemArr := ul.GSE.Cut(name, true)
 	indexDocs := map[int][]string{}
 	for _, v := range itemArr {
 		if ul.NgrmText[v] != nil {
@@ -30,7 +31,6 @@ func ConfrimTargetMedicalClass(name string) string {
 			}
 		}
 	}
-
 	return calculateSimilarityScore(indexDocs, itemArr)
 }
 
@@ -54,9 +54,6 @@ func calculateSimilarityScore(indexDocs map[int][]string, itemArr []string) stri
 		//临时记录一下分数
 		scoreDocs_1[k] = qu.FloatFormat(base_score, 2)
 		scoreDocs_2[k] = qu.FloatFormat(dice_score, 2)
-		if finally_score > 1.0 {
-			break
-		}
 	}
 	//取出最高有效分~数据
 	index, score := getMaxScore(scoreDocs)
@@ -64,13 +61,17 @@ func calculateSimilarityScore(indexDocs map[int][]string, itemArr []string) stri
 	if score > 0.0 {
 		match_str = strings.Join(ul.NgrmDocIndex[index], "")
 	}
-	if scoreDocs_2[index] > 0.8 {
-		//log.Debug("相似匹配:", info_str, "~", match_str, "~", scoreDocs_1[index], "~", scoreDocs_2[index])
-	}
 
 	//分数~临界值校验
-	if score > 0.6 {
+	if score > 0.5 {
 		//log.Debug("相似匹配:", strings.Join(itemArr, ""), "~", match_str, "~", scoreDocs_1[index], "~", scoreDocs_2[index])
+		ul.Mgo.Save("zzzzzz", map[string]interface{}{
+			"name":       strings.Join(itemArr, ""),
+			"match_name": match_str,
+			"score":      score,
+			"score_1":    scoreDocs_1[index],
+			"score_2":    scoreDocs_2[index],
+		})
 		return ul.ProductDocText[match_str]
 	}
 	return ""

+ 46 - 0
fieldproject_medical/data_service/src/export/export.go

@@ -0,0 +1,46 @@
+package export
+
+import (
+	"bidding"
+	"class"
+	log "github.com/donnie4w/go-logger/logger"
+	qu "qfw/util"
+	ul "util"
+)
+
+//导出测试数据
+func ExportBiddingTestData() {
+	sess := ul.Mgo.GetMgoConn()
+	defer ul.Mgo.DestoryMongoConn(sess)
+	//临时取~测试数据~8月1日
+	q := map[string]interface{}{
+		"_id": map[string]interface{}{
+			"$gte": class.StringTOBsonId("62e6a7000000000000000000"),
+		},
+	}
+	total, isok := 0, 0
+	it := sess.DB(ul.Mgo.DbName).C("bidding").Find(&q).Sort("_id").Iter()
+	for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
+		if total%10000 == 0 {
+			log.Debug("cur index ", total, "~", isok)
+		}
+		if isok > 5000 {
+			break
+		}
+		if qu.IntAll(tmp["extracttype"]) != 1 {
+			tmp = make(map[string]interface{})
+			continue
+		}
+		//是否为医疗行业数据
+		p_list := bidding.IsMarkInterfaceMap(tmp["purchasinglist"])
+		sub_list := bidding.IsMarkInterfaceArr(tmp["subscopeclass"])
+		_, b := bidding.IsIndustryInfo(sub_list)
+		if b && len(p_list) > 0 {
+			isok++
+			ul.Mgo.Save("zzzzzzzzz", tmp)
+		}
+		tmp = make(map[string]interface{})
+	}
+
+	log.Debug("is over ", total, "~", isok)
+}

+ 5 - 3
fieldproject_medical/data_service/src/main.go

@@ -1,6 +1,7 @@
 package main
 
 import (
+	"bidding"
 	log "github.com/donnie4w/go-logger/logger"
 	"net/http"
 	"service"
@@ -15,9 +16,10 @@ func init() {
 
 func main() {
 	log.Debug("run main ... ")
-	//bidding.RunPurchasingInfo(
-	//	"100000000000000000000000",
-	//	"900000000000000000000000")
+
+	bidding.RunPurchasingInfo(
+		"100000000000000000000000",
+		"900000000000000000000000")
 
 	//测试相似度计算
 	http.ListenAndServe(":9991", nil)

+ 18 - 16
fieldproject_medical/data_service/src/util/initcfg.go

@@ -15,6 +15,8 @@ const (
 	V_Code_Bidtopsubtype = "code_bidtopsubtype"
 	V_Code_Productclass  = "code_productclass"
 	V_Product_Baseinfo   = "product_baseinfo"
+
+	S_Bidding_Coll = "zktest_mysql_bidding_test"
 )
 
 var (
@@ -44,30 +46,30 @@ func InitClass() {
 //初始化mgo
 func initMgo() {
 	if isLocal {
-		//Save_Mgo = &MongodbSim{
-		//	MongodbAddr: "127.0.0.1:27017",
-		//	DbName:      "zhengkun",
-		//	Size:        10,
-		//	UserName:    "",
-		//	Password:    "",
-		//}
-		//Save_Mgo.InitPool()
-
 		Mgo = &MongodbSim{
-			MongodbAddr: "192.168.3.207:27092",
+			MongodbAddr: "127.0.0.1:27017",
 			DbName:      "zhengkun",
 			Size:        10,
 			UserName:    "",
 			Password:    "",
 		}
 		Mgo.InitPool()
+
+		//Mgo = &MongodbSim{
+		//	MongodbAddr: "192.168.3.207:27092",
+		//	DbName:      "zhengkun",
+		//	Size:        10,
+		//	UserName:    "",
+		//	Password:    "",
+		//}
+		//Mgo.InitPool()
 	} else {
 		Mgo = &MongodbSim{
-			MongodbAddr: "172.17.4.87:27080",
-			DbName:      "py_spider",
+			MongodbAddr: "172.17.145.163:27083,172.17.4.187:27082",
+			DbName:      "qfw",
 			Size:        10,
-			UserName:    "",
-			Password:    "",
+			UserName:    "zhengkun",
+			Password:    "zk@123123",
 		}
 		Mgo.InitPool()
 	}
@@ -78,7 +80,7 @@ func initMysql() {
 		Address:  "192.168.3.217:4000",
 		UserName: "root",
 		PassWord: "=PDT49#80Z!RVv52_z",
-		DBName:   "field_medical_data",
+		DBName:   "medical_fileld_data",
 	}
 	MysqlMedicalTool.Init()
 
@@ -86,7 +88,7 @@ func initMysql() {
 		Address:  "192.168.3.217:4000",
 		UserName: "root",
 		PassWord: "=PDT49#80Z!RVv52_z",
-		DBName:   "field_medical_data",
+		DBName:   "global_common_data",
 	}
 	MysqlGlobalTool.Init()
 

+ 1 - 1
fieldproject_medical/data_service/src/util/initvcode.go

@@ -7,7 +7,7 @@ import (
 
 //加载招标信息~分类
 func initBidInfoClass() {
-	data_types := MysqlMedicalTool.Find(V_Code_Bidtopsubtype, nil, "", "", -1, -1)
+	data_types := MysqlGlobalTool.Find(V_Code_Bidtopsubtype, nil, "", "", -1, -1)
 	//先构建所有一级数据
 	for _, v := range *data_types {
 		name := qu.ObjToString(v["name"])