소스 검색

抽取备份~站点表~加载每天更新

zhengkun 2 년 전
부모
커밋
3820c406a7
9개의 변경된 파일523개의 추가작업 그리고 169개의 파일을 삭제
  1. 2 0
      src/config.json
  2. 13 16
      src/jy/extract/extract.go
  3. 22 44
      src/jy/extract/extractInit.go
  4. 74 4
      src/jy/extract/extractcity_new.go
  5. 33 11
      src/jy/extract/extractcity_way.go
  6. 2 90
      src/jy/extract/extractudp.go
  7. 360 0
      src/jy/mongodbutil/mgo.go
  8. 13 4
      src/jy/util/util.go
  9. 4 0
      src/mark

+ 2 - 0
src/config.json

@@ -5,6 +5,8 @@
     "dbname": "extract_local",
     "dbname_addrs": "mixdata",
     "dbname_addrs_c": "address_new_2020",
+    "site_addr": "127.0.0.1:27017",
+    "site_dbname": "extract_local",
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
     "ffield": true,

+ 13 - 16
src/jy/extract/extract.go

@@ -113,11 +113,8 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitClearFn(false)
 	ext.InitClearFn(true)
 	ext.Lock()
-	//ext.IsExtractCity = false
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
-		//初始化城市DFA信息
 		ext.InitCityInfo()
-		//ext.InitCityDFA()
 		ext.InitAreaCode()
 		ext.InitPostCode()
 	}
@@ -216,8 +213,6 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitClearFn(true)
 	ext.Lock()
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
-		//初始化城市DFA信息
-		//ext.InitCityDFA()
 		ext.InitCityInfo()
 		ext.InitAreaCode()
 		ext.InitPostCode()
@@ -2368,12 +2363,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		if ju.QualityAudit {
 			e.QualityAudit(tmp)
 		}
+		//落款识别
+		inscribeRecognize(&tmp, *j.Data)
+
 		//城市抽取
 		if e.IsExtractCity {
 			//e.NewExtractCity(j, &tmp)
-
 			e.ExtractRegionInfo(j, &tmp, true)
-
 		}
 		//品牌抽取
 		if ju.IsBrandGoods {
@@ -2794,23 +2790,24 @@ func checkFields(tmp map[string]interface{}, j_data map[string]interface{}) map[
 			tmp["nj_record"] = nj_record
 		}
 	}
+	return tmp
+}
 
+//落款识别~采购单位
+func inscribeRecognize(tmp *map[string]interface{}, j_data map[string]interface{}) {
 	//落款实体
-	if qu.ObjToString(tmp["buyer"]) == "" && ju.Inscribe &&
-		!(qu.ObjToString(tmp["toptype"]) == "拟建" && qu.ObjToString(tmp["subtype"]) == "拟建") {
+	if qu.ObjToString((*tmp)["buyer"]) == "" && ju.Inscribe &&
+		!(qu.ObjToString((*tmp)["toptype"]) == "拟建" && qu.ObjToString((*tmp)["subtype"]) == "拟建") {
 		new_buyer := InscribeEntity(qu.ObjToString(j_data["detail"]))
 		if new_buyer != "" {
-			tmp["buyer"] = new_buyer
+			(*tmp)["buyer"] = new_buyer
 		}
 	}
-
 	//拟建不能存buyer
-	if qu.ObjToString(tmp["toptype"]) == "拟建" &&
-		qu.ObjToString(tmp["subtype"]) == "拟建" {
-		delete(tmp, "buyer")
+	if qu.ObjToString((*tmp)["toptype"]) == "拟建" &&
+		qu.ObjToString((*tmp)["subtype"]) == "拟建" {
+		delete((*tmp), "buyer")
 	}
-
-	return tmp
 }
 
 func InscribeEntity(detail string) string {

+ 22 - 44
src/jy/extract/extractInit.go

@@ -1008,8 +1008,15 @@ func InitSite() []map[string]interface{} {
 			"$ne": "代理机构",
 		},
 	}
-	list, _ := db.Mgo.Find("site", query, nil, `{"site":1,"area":1,"city":1,"district":1}`, false, -1, -1)
-	return *list
+	list, _ := ju.Site_Mgo.Find("site", query, nil, map[string]interface{}{
+		"site":     1,
+		"area":     1,
+		"city":     1,
+		"district": 1,
+	})
+	return list
+	//list, _ := db.Mgo.Find("site", query, nil, `{"site":1,"area":1,"city":1,"district":1}`, false, -1, -1)
+	//return *list
 }
 
 //加载新疆兵团映射关系
@@ -1045,7 +1052,6 @@ func (e *ExtractTask) InitUpdateSite() {
 		}
 	}
 	log.Debug("有效站点数量:", len(e.SiteCityMap))
-
 }
 
 func (e *ExtractTask) InitCityInfo() {
@@ -1265,53 +1271,27 @@ func (e *ExtractTask) InitVar() {
 	e.Seg_PCD.LoadDict("./res/pcd.txt")
 	e.Seg_SV.LoadDict("./res/sv.txt")
 
+	//初始化城市相关
+	e.SiteCityMap = make(map[string]*SiteCity)
+	e.ProvinceMap = make(map[string]string)
+	e.CityMap = make(map[string]string)
+	e.DistrictSimAndAll = make(map[string][]map[string]*City)
+	e.CityBriefMap = make(map[string]*City)
+	e.CityFullMap = make(map[string]*City)
+	e.ProvinceBriefMap = make(map[string]*Province)
+	e.DistrictCityMap = make(map[string][]*City)
+	e.StreetDistrictMap = make(map[string][]*District)
 	//新疆兵团-数组
-	if e.XjbtCityArr == nil {
-		e.XjbtCityArr = make([]map[string]interface{}, 0)
-	}
-
+	e.XjbtCityArr = make([]map[string]interface{}, 0)
 	//敏感词-筛选
 	e.SensitiveFullCity = sensitive.New()
 	e.SensitiveSimCity = sensitive.New()
-
-	//初始化map
-	if e.SiteCityMap == nil {
-		e.SiteCityMap = make(map[string]*SiteCity)
-	}
-	if e.ProvinceMap == nil {
-		e.ProvinceMap = make(map[string]string)
-	}
-	if e.CityMap == nil {
-		e.CityMap = make(map[string]string)
-	}
-
-	if e.DistrictSimAndAll == nil {
-		e.DistrictSimAndAll = make(map[string][]map[string]*City)
-	}
-	if e.CityBriefMap == nil {
-		e.CityBriefMap = make(map[string]*City)
-	}
-	if e.CityFullMap == nil {
-		e.CityFullMap = make(map[string]*City)
-	}
-	if e.ProvinceBriefMap == nil {
-		e.ProvinceBriefMap = make(map[string]*Province)
-	}
-	if e.DistrictCityMap == nil {
-		e.DistrictCityMap = make(map[string][]*City)
-	}
-	if e.StreetDistrictMap == nil {
-		e.StreetDistrictMap = make(map[string][]*District)
-	}
-
 }
 
 //初始化邮编库
 func (e *ExtractTask) InitPostCode() {
 	defer qu.Catch()
-	if e.PostCodeMap == nil {
-		e.PostCodeMap = make(map[string]*PostCode)
-	}
+	e.PostCodeMap = make(map[string]*PostCode)
 	list, _ := db.Mgo.Find("postcode", nil, nil, nil, false, -1, -1)
 	for _, l := range *list {
 		pc := &PostCode{}
@@ -1326,9 +1306,7 @@ func (e *ExtractTask) InitPostCode() {
 //初始化区号库
 func (e *ExtractTask) InitAreaCode() {
 	defer qu.Catch()
-	if e.AreaCodeMap == nil {
-		e.AreaCodeMap = make(map[string]*AreaCode)
-	}
+	e.AreaCodeMap = make(map[string]*AreaCode)
 	list, _ := db.Mgo.Find("areacode", nil, nil, nil, false, -1, -1)
 	for _, l := range *list {
 		ac := &AreaCode{}

+ 74 - 4
src/jy/extract/extractcity_new.go

@@ -10,6 +10,14 @@ import (
 //抽取地域信息
 func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{}, isLog bool) {
 	defer qu.Catch()
+
+	/*
+		调整逻辑~ 2022.12.6
+			1、标题组,若之前未抽取到值,区县不采用简称
+			2、站点匹配的地域~标准化校验
+			3、编写脚本,每天同步更新站点表信息
+	*/
+
 	//日志记录
 	logRecordInfo := []map[string]interface{}{}
 	f_area, f_city, f_district := "", "", ""
@@ -35,7 +43,7 @@ func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{},
 		(*tmp)["regions_log"] = logRecordInfo
 		return
 	}
-	//638988a7911e1eb34509c209  功能存在缺陷
+
 	//字段可控
 	CityFieldsArr := []string{
 		"projectaddr,addressing",
@@ -52,7 +60,11 @@ func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{},
 		if !isContinue {
 			continue
 		}
-		field_regions, old_regions, new_regions := e.GetRegionByGroupInfo(keyArr, *tmp)
+		isUseful := false //当前组提取前~是否有值
+		if len(all_regions) > 0 {
+			isUseful = true
+		}
+		field_regions, old_regions, new_regions := e.GetRegionByGroupInfo(keyArr, isUseful, *tmp)
 		AnalysisIsUniqueInfo(new_regions, &all_regions)
 		if isLog { //日志记录
 			LogProcessRecordingForGroupInfo(strings.Join(keyArr, "_"), textValues, field_regions, old_regions, all_regions, &logRecordInfo)
@@ -100,7 +112,7 @@ func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{},
 }
 
 //对组进行分析处理
-func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, tmp map[string]interface{}) (map[string]interface{}, map[string]map[string]map[string]string, map[string]map[string]map[string]string) {
+func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, isUseful bool, tmp map[string]interface{}) (map[string]interface{}, map[string]map[string]map[string]string, map[string]map[string]map[string]string) {
 	old_regions := map[string]map[string]map[string]string{}
 	textArr := []string{}
 	field_regions := map[string]interface{}{}
@@ -113,7 +125,12 @@ func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, tmp map[string]inter
 		} else if key == "buyertel" {
 			valuesArr = e.GetRegionByTelNumber(text, &old_regions)
 		} else {
-			valuesArr = e.GetRegionFromText(text, &old_regions, 2)
+			//如果为标题或者项目名称
+			if (key == "title" || key == "projectname") && !isUseful {
+				valuesArr = e.GetRegionFromTPText(text, &old_regions)
+			} else {
+				valuesArr = e.GetRegionFromText(text, &old_regions, 2)
+			}
 		}
 		field_regions[key] = valuesArr
 	}
@@ -224,6 +241,10 @@ func (e *ExtractTask) GetRegionByTentativeSite(j *ju.Job, all_regions *map[strin
 			district = scMap.D
 		}
 	}
+
+	//对省市区进行标准化校验~简称全程的问题
+	e.StandardizedegionInfo(&area, &city, &district)
+
 	//取出唯一数据
 	j_area, j_city, j_district := "", "", ""
 	is_adjust := false
@@ -337,3 +358,52 @@ func TextGroupInfo(keyArr []string, tmp map[string]interface{}) (bool, []string)
 	}
 	return isvalid, dataArr
 }
+
+//简称全程标准化的校验~
+func (e *ExtractTask) StandardizedegionInfo(area *string, city *string, district *string) {
+	//特殊市补充
+	if *area == "北京" {
+		*city = "北京市"
+	} else if *area == "天津" {
+		*city = "天津市"
+	} else if *area == "上海" {
+		*city = "上海市"
+	} else if *area == "重庆" {
+		*city = "重庆市"
+	}
+	//非空与空~是否标准校验
+	if *area == "" {
+		*city = ""
+		*district = ""
+	} else {
+		if province := e.ProvinceMap[*area]; province != "" {
+			*area = province
+		}
+		if *city == "" {
+			*district = ""
+		} else {
+			if csMap := e.CityBriefMap[*city]; csMap != nil {
+				if csMap.P.Brief == *area && csMap.Name != "" {
+					*city = csMap.Name
+				} else {
+					*city = ""
+					*district = ""
+				}
+			}
+			if *district != "" {
+				citysArr := e.DistrictSimAndAll[*district]
+				if len(citysArr) == 1 {
+					full_city := citysArr[0]
+					for d, _ := range full_city {
+						*district = d
+					}
+				} else if len(citysArr) > 1 {
+					*district = ""
+				} else {
+
+				}
+			}
+		}
+	}
+
+}

+ 33 - 11
src/jy/extract/extractcity_way.go

@@ -49,8 +49,8 @@ func CompleteRegionInfo(area *string, city *string, district *string) {
 	}
 }
 
-//根据词获取所有的地域
-func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string {
+//根据词获取所有的地域 is_brief_district 是否进行简称区划提取
+func (e *ExtractTask) takeRegionsFromWords(text string, is_brief_district bool) []map[string]string {
 	regions := []map[string]string{}
 	//全称匹配
 	for pos_full, trie_full := range e.Trie_Fulls {
@@ -89,14 +89,16 @@ func (e *ExtractTask) takeRegionsFromWords(text string) []map[string]string {
 					}
 				}
 			} else if pos_sim == 2 {
-				citysArr := e.DistrictSimAndAll[text]
-				for _, full_citys := range citysArr {
-					for district, c := range full_citys {
-						if c == nil || c.P == nil || c.Name == "" {
-							continue
-						}
-						if c.P.Brief != "" && c.Name != "" && district != "" {
-							regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
+				if is_brief_district {
+					citysArr := e.DistrictSimAndAll[text]
+					for _, full_citys := range citysArr {
+						for district, c := range full_citys {
+							if c == nil || c.P == nil || c.Name == "" {
+								continue
+							}
+							if c.P.Brief != "" && c.Name != "" && district != "" {
+								regions = append(regions, map[string]string{"area": c.P.Brief, "city": c.Name, "district": district})
+							}
 						}
 					}
 				}
@@ -119,7 +121,27 @@ func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[str
 		wordsArr = e.Seg_SV.Cut(text, true)
 	}
 	for _, word := range wordsArr {
-		regionArr := e.takeRegionsFromWords(word)
+		regionArr := e.takeRegionsFromWords(word, true)
+		for _, v := range regionArr {
+			area := qu.ObjToString(v["area"])
+			city := qu.ObjToString(v["city"])
+			district := qu.ObjToString(v["district"])
+			UpdateRegionsInfo(area, city, district, regions)
+			regionValues = append(regionValues, map[string]interface{}{"area": area, "city": city, "district": district})
+		}
+	}
+	return regionValues
+}
+
+func (e *ExtractTask) GetRegionFromTPText(text string, regions *map[string]map[string]map[string]string) []map[string]interface{} {
+	regionValues := []map[string]interface{}{}
+	if text == "" {
+		return regionValues
+	}
+	wordsArr := []string{}
+	wordsArr = e.Seg_SV.Cut(text, true)
+	for _, word := range wordsArr {
+		regionArr := e.takeRegionsFromWords(word, false)
 		for _, v := range regionArr {
 			area := qu.ObjToString(v["area"])
 			city := qu.ObjToString(v["city"])

+ 2 - 90
src/jy/extract/extractudp.go

@@ -142,10 +142,7 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 		ext.InitClearFn(false)
 		ext.InitClearFn(true)
 		ext.Lock()
-		//ext.IsExtractCity = false
 		if ext.IsExtractCity { //版本上控制是否开始城市抽取
-			//初始化城市DFA信息
-			//ext.InitCityDFA()
 			ext.InitCityInfo()
 			ext.InitAreaCode()
 			ext.InitPostCode()
@@ -156,44 +153,19 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 		ext.InitAuditRule()
 		ext.InitAuditClass()
 		ext.InitAuditRecogField()
-
 		//品牌抽取是否开启
 		ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
-
 		ext.ResultSave(true)
 		ext.BidSave(true)
 		ext.IsRun = true
 		ext.InitFile()
 	} else {
-		ext.BidTotal = 0 //是否更新站点数据~~~
+		ext.BidTotal = 0
 		if ju.IsUpdateSite && ext.IsExtractCity {
+			log.Debug()
 			ext.InitUpdateSite()
 			ju.IsUpdateSite = false
 		}
-		//更新规则~标签~~
-		if ju.IsUpdateRuleTag {
-			ju.IsUpdateRuleTag = false
-			ext.InitRulePres()
-			ext.InitRuleBacks(false)
-			ext.InitRuleBacks(true)
-			ext.InitRuleCore(false)
-			ext.InitRuleCore(true)
-			ext.InitBlockRule()
-			ext.InitPkgCore()
-			ext.InitTag(false)
-			ext.InitTag(true)
-			ext.InitClearFn(false)
-			ext.InitClearFn(true)
-			ext.Lock()
-			if ext.IsExtractCity { //版本上控制是否开始城市抽取
-				//初始化城市DFA信息
-				//ext.InitCityDFA()
-				ext.InitCityInfo()
-				ext.InitAreaCode()
-				ext.InitPostCode()
-			}
-			ext.Unlock()
-		}
 	}
 	index := 0
 	if len(instanceId) > 0 { //分布式抽取进度
@@ -323,63 +295,3 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 
 //中标预测信息抽取,ossid为附件识别后的id
 var exF *ExtractTask
-
-func ExtractByBidForecast(infoid string, ossid ...string) map[string]interface{} {
-	defer qu.Catch()
-	if exF == nil {
-		exF = &ExtractTask{}
-		exF.Id = qu.ObjToString(ju.Config["udptaskid"])
-		exF.InitTaskInfo()
-		exF.TaskInfo.FDB = db.MgoFactory(1, 2, 600, exF.TaskInfo.FromDbAddr, exF.TaskInfo.FromDB)
-		exF.TaskInfo.TDB = db.MgoFactory(1, 2, 600, exF.TaskInfo.ToDbAddr, exF.TaskInfo.ToDB)
-		exF.InitSite()
-		exF.InitRulePres()
-		exF.InitRuleBacks(false)
-		exF.InitRuleBacks(true)
-		exF.InitRuleCore(false)
-		exF.InitRuleCore(true)
-		exF.InitBlockRule()
-		exF.InitPkgCore()
-		exF.InitTag(false)
-		exF.InitTag(true)
-		exF.InitClearFn(false)
-		exF.InitClearFn(true)
-
-		if exF.IsExtractCity { //版本上控制是否开始城市抽取
-			//初始化城市DFA信息
-			//exF.InitCityDFA()
-			exF.InitCityInfo()
-			exF.InitAreaCode()
-			exF.InitPostCode()
-		}
-		//质量审核
-		exF.InitAuditFields()
-		exF.InitAuditRule()
-		exF.InitAuditClass()
-		exF.InitAuditRecogField()
-
-		//品牌抽取是否开启
-		ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
-
-		exF.ResultSave(true)
-		exF.BidSave(true)
-		exF.IsRun = true
-		exF.InitFile()
-	}
-	tmp, _ := exF.TaskInfo.FDB.FindById(exF.TaskInfo.FromColl, infoid, nil)
-	if exF.IsFileField && ((*tmp)["projectinfo"] != nil || (*tmp)["attach_text"] != nil) {
-		(*tmp)["isextFile"] = true
-	}
-	exF.TaskInfo.ProcessPool <- true
-	j, jf, _ := exF.PreInfo(*tmp)
-	wg := sync.WaitGroup{}
-	wg.Add(1)
-	go func(wg *sync.WaitGroup, j, jf *ju.Job) {
-		defer wg.Done()
-		exF.ExtractProcess(j, jf, false)
-	}(&wg, j, jf)
-	wg.Wait()
-	exF.BidSave(false)
-
-	return nil
-}

+ 360 - 0
src/jy/mongodbutil/mgo.go

@@ -0,0 +1,360 @@
+package mongodbutil
+
+import (
+	"context"
+	"log"
+	"runtime"
+	"time"
+
+	"go.mongodb.org/mongo-driver/bson"
+	"go.mongodb.org/mongo-driver/bson/primitive"
+	"go.mongodb.org/mongo-driver/mongo"
+	"go.mongodb.org/mongo-driver/mongo/options"
+)
+
+type MgoSess struct {
+	Db     string
+	Coll   string
+	Query  interface{}
+	Sorts  []string
+	fields interface{}
+	limit  int64
+	skip   int64
+	M      *MongodbSim
+}
+
+type MgoIter struct {
+	Cursor *mongo.Cursor
+}
+
+func (mt *MgoIter) Next(result interface{}) bool {
+	if mt.Cursor != nil {
+		if mt.Cursor.Next(nil) {
+			err := mt.Cursor.Decode(result)
+			if err != nil {
+				log.Println("mgo cur err", err.Error())
+				mt.Cursor.Close(nil)
+				return false
+			}
+			return true
+		} else {
+			mt.Cursor.Close(nil)
+			return false
+		}
+	} else {
+		return false
+	}
+
+}
+
+func (ms *MgoSess) DB(name string) *MgoSess {
+	ms.Db = name
+	return ms
+}
+
+func (ms *MgoSess) C(name string) *MgoSess {
+	ms.Coll = name
+	return ms
+}
+
+func (ms *MgoSess) Find(q interface{}) *MgoSess {
+	ms.Query = q
+	return ms
+}
+
+func (ms *MgoSess) Select(fields interface{}) *MgoSess {
+	ms.fields = fields
+	return ms
+}
+
+func (ms *MgoSess) Limit(limit int64) *MgoSess {
+	ms.limit = limit
+	return ms
+}
+func (ms *MgoSess) Skip(skip int64) *MgoSess {
+	ms.skip = skip
+	return ms
+}
+
+func (ms *MgoSess) Sort(sorts ...string) *MgoSess {
+	ms.Sorts = sorts
+	return ms
+}
+
+func (ms *MgoSess) Iter() *MgoIter {
+	it := &MgoIter{}
+	find := options.Find()
+	if ms.skip > 0 {
+		find.SetSkip(ms.skip)
+	}
+	if ms.limit > 0 {
+		find.SetLimit(ms.limit)
+	}
+	find.SetBatchSize(100)
+	if len(ms.Sorts) > 0 {
+		sort := bson.M{}
+		for _, k := range ms.Sorts {
+			switch k[:1] {
+			case "-":
+				sort[k[1:]] = -1
+			case "+":
+				sort[k[1:]] = 1
+			default:
+				sort[k] = 1
+			}
+		}
+		find.SetSort(sort)
+	}
+	if ms.fields != nil {
+		find.SetProjection(ms.fields)
+	}
+	cur, err := ms.M.C.Database(ms.Db).Collection(ms.Coll).Find(ms.M.Ctx, ms.Query, find)
+	if err != nil {
+		log.Println("mgo find err", err.Error())
+	} else {
+		it.Cursor = cur
+	}
+	return it
+}
+
+type MongodbSim struct {
+	MongodbAddr string
+	Size        int
+	//	MinSize     int
+	DbName   string
+	C        *mongo.Client
+	Ctx      context.Context
+	ShortCtx context.Context
+	pool     chan bool
+	UserName string
+	Password string
+}
+
+func (m *MongodbSim) GetMgoConn() *MgoSess {
+	//m.Open()
+	ms := &MgoSess{}
+	ms.M = m
+	return ms
+}
+
+func (m *MongodbSim) DestoryMongoConn(ms *MgoSess) {
+	//m.Close()
+	ms.M = nil
+	ms = nil
+}
+
+func (m *MongodbSim) InitPool() {
+	opts := options.Client()
+	opts.SetConnectTimeout(3 * time.Second)
+	opts.ApplyURI("mongodb://" + m.MongodbAddr)
+	opts.SetMaxPoolSize(uint64(m.Size))
+	m.pool = make(chan bool, m.Size)
+
+	if m.UserName != "" && m.Password != "" {
+		cre := options.Credential{
+			Username: m.UserName,
+			Password: m.Password,
+		}
+		opts.SetAuth(cre)
+	}
+
+	opts.SetMaxConnIdleTime(2 * time.Hour)
+	m.Ctx, _ = context.WithTimeout(context.Background(), 99999*time.Hour)
+	m.ShortCtx, _ = context.WithTimeout(context.Background(), 1*time.Minute)
+	client, err := mongo.Connect(m.ShortCtx, opts)
+	if err != nil {
+		log.Println("mgo init error:", err.Error())
+	} else {
+		m.C = client
+		log.Println("init success")
+	}
+}
+
+func (m *MongodbSim) Open() {
+	m.pool <- true
+}
+func (m *MongodbSim) Close() {
+	<-m.pool
+}
+
+//批量插入
+func (m *MongodbSim) UpSertBulk(c string, doc ...[]map[string]interface{}) (map[int64]interface{}, bool) {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	var writes []mongo.WriteModel
+	for _, d := range doc {
+		write := mongo.NewUpdateOneModel()
+		write.SetFilter(d[0])
+		write.SetUpdate(d[1])
+		write.SetUpsert(true)
+		writes = append(writes, write)
+	}
+	r, e := coll.BulkWrite(m.Ctx, writes)
+	if e != nil {
+		log.Println("mgo upsert error:", e.Error())
+		return nil, false
+	}
+	//	else {
+	//		if r.UpsertedCount != int64(len(doc)) {
+	//			log.Println("mgo upsert uncomplete:uc/dc", r.UpsertedCount, len(doc))
+	//		}
+	//		return true
+	//	}
+	return r.UpsertedIDs, true
+}
+
+//批量插入
+func (m *MongodbSim) SaveBulk(c string, doc ...map[string]interface{}) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	var writes []mongo.WriteModel
+	for _, d := range doc {
+		write := mongo.NewInsertOneModel()
+		write.SetDocument(d)
+		writes = append(writes, write)
+	}
+	_, e := coll.BulkWrite(m.Ctx, writes)
+	if e != nil {
+		log.Println("mgo savebulk error:", e.Error())
+		return false
+	}
+	return true
+}
+
+//保存
+func (m *MongodbSim) Save(c string, doc map[string]interface{}) interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.InsertOne(m.Ctx, doc)
+	if err != nil {
+		return nil
+	}
+	return r.InsertedID
+}
+
+//按条件更新
+func (m *MongodbSim) Update(c string, q, u interface{}, upsert bool, multi bool) bool {
+	defer catch()
+	m.Open()
+	defer m.Close()
+	ct := options.Update()
+	if upsert {
+		ct.SetUpsert(true)
+	}
+	coll := m.C.Database(m.DbName).Collection(c)
+	var err error
+	if multi {
+		_, err = coll.UpdateMany(m.Ctx, ObjToM(q), ObjToM(u), ct)
+	} else {
+		_, err = coll.UpdateOne(m.Ctx, ObjToM(q), ObjToM(u), ct)
+	}
+	if err != nil {
+		log.Println("删除错误", err.Error())
+		return false
+	}
+	return true
+}
+
+//更新by Id
+func (m *MongodbSim) UpdateById(c, id string, doc map[string]interface{}) bool {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	_, err := coll.UpdateOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)}, doc)
+	if err != nil {
+		log.Println(err)
+		return false
+	}
+	return true
+}
+
+//删除by id
+func (m *MongodbSim) DeleteById(c, id string) int64 {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.DeleteOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
+	if err != nil {
+		return 0
+	}
+	return r.DeletedCount
+}
+
+//通过条件删除
+func (m *MongodbSim) Delete(c string, query map[string]interface{}) int64 {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r, err := coll.DeleteMany(m.Ctx, query)
+	if err != nil {
+		return 0
+	}
+	return r.DeletedCount
+}
+
+//findbyid
+func (m *MongodbSim) FindById(c, id string) map[string]interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r := coll.FindOne(m.Ctx, map[string]interface{}{"_id": StringTOBsonId(id)})
+	v := map[string]interface{}{}
+	r.Decode(&v)
+	return v
+}
+
+//findone
+func (m *MongodbSim) FindOne(c string, query map[string]interface{}) map[string]interface{} {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	r := coll.FindOne(m.Ctx, query)
+	v := map[string]interface{}{}
+	r.Decode(&v)
+	return v
+}
+
+//find
+func (m *MongodbSim) Find(c string, query map[string]interface{}, sort, fields interface{}) ([]map[string]interface{}, error) {
+	m.Open()
+	defer m.Close()
+	coll := m.C.Database(m.DbName).Collection(c)
+	op := options.Find()
+	r, err := coll.Find(m.Ctx, query, op.SetSort(sort), op.SetProjection(fields))
+	if err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	var results []map[string]interface{}
+	if err = r.All(m.Ctx, &results); err != nil {
+		log.Fatal(err)
+		return nil, err
+	}
+	return results, nil
+}
+
+func StringTOBsonId(id string) primitive.ObjectID {
+	objectId, _ := primitive.ObjectIDFromHex(id)
+	return objectId
+}
+
+func BsonTOStringId(id interface{}) string {
+	return id.(primitive.ObjectID).Hex()
+}
+
+//出错拦截
+func catch() {
+	if r := recover(); r != nil {
+		log.Println(r)
+		for skip := 0; ; skip++ {
+			_, file, line, ok := runtime.Caller(skip)
+			if !ok {
+				break
+			}
+			go log.Printf("%v,%v\n", file, line)
+		}
+	}
+}

+ 13 - 4
src/jy/util/util.go

@@ -40,11 +40,10 @@ var BrandGet *DFA     //品牌
 var IsBrandGoods bool //是否开启品牌抽取
 
 var SaveResult, FieldsFind, IsSaveTag, SaveBlock, QualityAudit, Ffield, Inscribe bool
-var AddrsSess *mgo.Collection
-var QyxySess *mgo.Collection
+var AddrsSess, QyxySess *mgo.Collection
+var Site_Mgo *MongodbSim
 
 var IsUpdateSite bool
-var IsUpdateRuleTag bool
 
 func init() {
 	syncint = make(chan bool, 1)
@@ -58,6 +57,16 @@ func UtilInit() {
 	AddrsSess = Mgo.Get().DB(qu.ObjToString(Config["dbname_addrs"])).C(qu.ObjToString(Config["dbname_addrs_c"]))
 	QyxySess = Mgo.Get().DB(qu.ObjToString(Config["dbname_addrs"])).C("qyxy_std")
 
+	//站点爬虫库
+	Site_Mgo = &MongodbSim{
+		MongodbAddr: qu.ObjToString(Config["site_addr"]),
+		DbName:      qu.ObjToString(Config["site_dbname"]),
+		Size:        3,
+		UserName:    "",
+		Password:    "",
+	}
+	Site_Mgo.InitPool()
+
 	SaveResult, _ = Config["saveresult"].(bool)
 	FieldsFind, _ = Config["fieldsfind"].(bool)
 	IsSaveTag, _ = Config["iscltlog"].(bool)
@@ -73,7 +82,7 @@ func UtilInit() {
 	//定时更新站点信息
 	IsUpdateSite = false
 	c := cron.New()
-	c.AddFunc("0 0 1 ? * WED", func() {
+	c.AddFunc("0 0 8 * * ?", func() {
 		IsUpdateSite = true
 	})
 	c.Start()

+ 4 - 0
src/mark

@@ -6,6 +6,8 @@
     "dbname": "extract_2021",
     "dbname_addrs": "mixdata",
     "dbname_addrs_c": "address_new_2020",
+    "site_addr": "172.17.4.87:27080",
+    "site_dbname": "editor",
     "redis": "qyk_redis=172.17.4.87:1479",
     "elasticsearch": "http://172.17.145.170:9800",
     "elasticsearch_index": "winner_enterprise",
@@ -80,6 +82,8 @@
     "dbname": "extract_2021",
     "dbname_addrs": "mixdata",
     "dbname_addrs_c": "address_new_2020",
+    "site_addr": "172.17.4.87:27080",
+    "site_dbname": "editor",
     "redis":"buyer=172.17.4.87:2679,winner=172.17.4.87:2679,agency=172.17.4.87:2679,qyk_redis=172.17.4.87:1479",
     "redis_qyk": "qyk_redis",
     "redis_winner_db": "1",