Sfoglia il codice sorgente

1、调整结果追踪不进行地域加载(除非重启)
2、三大运营商前置条件(补充校验)
3、定时每天8点后更新规则以及站点相关信息

zhengkun 1 anno fa
parent
commit
67fa3f00fa

+ 2 - 2
src/jy/extract/extract.go

@@ -16,7 +16,7 @@ import (
 	"unicode/utf8"
 )
 
-// 启动测试抽取-、、、、结果追踪
+// 结果追踪调试
 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool {
 	defer qu.Catch()
 	ext := TaskList[taskId]
@@ -41,7 +41,7 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitClearFn(false)
 	ext.InitClearFn(true)
 	ext.Lock()
-	if ext.IsExtractCity { //版本上控制是否开始城市抽取
+	if ext.IsExtractCity && ext.ProvinceMap == nil { //版本上控制是否开始城市抽取
 		ext.InitCityInfo()
 		ext.InitAreaCode()
 		ext.InitPostCode()

+ 79 - 10
src/jy/extract/extractcity_new.go

@@ -7,7 +7,7 @@ import (
 	"strings"
 )
 
-//标准化校验后存值
+// 标准化校验后存值
 func (e *ExtractTask) ExtractRegionClean(tmp *map[string]interface{}) {
 	//标准化校验
 	update_check := make(map[string]interface{}, 0)
@@ -22,7 +22,7 @@ func (e *ExtractTask) ExtractRegionClean(tmp *map[string]interface{}) {
 	}
 }
 
-//抽取地域信息
+// 抽取地域信息
 func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{}, isLog bool) {
 	defer qu.Catch()
 	//日志记录
@@ -46,6 +46,25 @@ func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{},
 		(*tmp)["regions_log"] = logRecordInfo
 		return
 	}
+
+	//是否三大运营商-前置条件2
+	e.GetRegionByTentativeOperator(qu.ObjToString((*tmp)["winner"]), &all_regions)
+	if isLog && len(all_regions) > 0 {
+		valueArr := []string{}
+		valueArr = append(valueArr, qu.ObjToString((*tmp)["winner"]))
+		LogProcessRecordingForTentative("运营商", valueArr, all_regions, &logRecordInfo)
+	}
+	b = ConfirmUniqueRegionInfo(all_regions, &f_area, &f_city, &f_district)
+	if b {
+		CompleteRegionInfo(&f_area, &f_city, &f_district)
+		//最终赋值
+		(*tmp)["area"] = f_area
+		(*tmp)["city"] = f_city
+		(*tmp)["district"] = f_district
+		(*tmp)["regions_log"] = logRecordInfo
+		return
+	}
+
 	//字段可控
 	RegionFieldsArr := ju.DefaultRegions
 	//采购单位比较特殊~需要根据站点类型进行重新组合
@@ -146,7 +165,7 @@ func (e *ExtractTask) ExtractRegionInfo(j *ju.Job, tmp *map[string]interface{},
 	(*tmp)["regions_log"] = logRecordInfo
 }
 
-//对组进行分析处理
+// 对组进行分析处理
 func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, tmp map[string]interface{}) (bool, []string, map[string]interface{}, map[string]map[string]map[string]string, map[string]map[string]map[string]string) {
 	old_regions := map[string]map[string]map[string]string{}
 	isExists := false
@@ -195,7 +214,7 @@ func (e *ExtractTask) GetRegionByGroupInfo(keyArr []string, tmp map[string]inter
 	return isExists, textArr, field_regions, old_regions, new_regions
 }
 
-//邮政编号
+// 邮政编号
 func (e *ExtractTask) GetRegionByPostCode(text string, regions *map[string]map[string]map[string]string) []map[string]interface{} {
 	regionsArr := []map[string]interface{}{}
 	pc := e.PostCodeMap[text]
@@ -211,7 +230,7 @@ func (e *ExtractTask) GetRegionByPostCode(text string, regions *map[string]map[s
 	return regionsArr
 }
 
-//固话号码
+// 固话号码
 func (e *ExtractTask) GetRegionByTelNumber(text string, regions *map[string]map[string]map[string]string) []map[string]interface{} {
 	regionsArr := []map[string]interface{}{}
 	if len(text) >= 11 {
@@ -239,7 +258,7 @@ func (e *ExtractTask) GetRegionByTelNumber(text string, regions *map[string]map[
 	return regionsArr
 }
 
-//初步确认~采集
+// 初步确认~采集
 func (e *ExtractTask) GetRegionByTentativeJsonData(j *ju.Job, all_regions *map[string]map[string]map[string]string) {
 	area, city, district := "", "", ""
 	regions := map[string]map[string]map[string]string{}
@@ -281,7 +300,7 @@ func (e *ExtractTask) GetRegionByTentativeJsonData(j *ju.Job, all_regions *map[s
 	}
 }
 
-//简称全程标准化的校验~
+// 简称全程标准化的校验~
 func (e *ExtractTask) StandardizedegionInfo(area *string, city *string, district *string) {
 	//特殊市补充
 	if *area == "北京" {
@@ -339,7 +358,7 @@ func (e *ExtractTask) StandardizedegionInfo(area *string, city *string, district
 	}
 }
 
-//站点取值   from 1-省  2-省市
+// 站点取值   from 1-省  2-省市
 func (e *ExtractTask) GetRegionBySite(site string, regions *map[string]map[string]map[string]string, from int) []map[string]interface{} {
 	regionArr := []map[string]interface{}{}
 	area, city, district := "", "", ""
@@ -365,7 +384,7 @@ func (e *ExtractTask) GetRegionBySite(site string, regions *map[string]map[strin
 	return regionArr
 }
 
-//新疆兵团
+// 新疆兵团
 func (e *ExtractTask) NewVerifyXjCorpsInfo(buyer string) (new_a, new_c, new_d string, ok bool) {
 	buyer = strings.ReplaceAll(buyer, "新疆兵团", "新疆生产建设兵团")
 	ok = false
@@ -393,7 +412,7 @@ func (e *ExtractTask) NewVerifyXjCorpsInfo(buyer string) (new_a, new_c, new_d st
 	return new_a, new_c, new_d, ok
 }
 
-//敏感词识别
+// 敏感词识别
 func (e *ExtractTask) NewVerifySensitiveInfo(detail string, area *string, city *string, district *string) bool {
 	detail = SensitiveReg.ReplaceAllString(detail, "")
 	detail = TextAfterRemoveTable(detail)
@@ -480,6 +499,56 @@ func resetFixedTelInfo(telArr []string) []string {
 	return codeArr
 }
 
+// 初步确认~运营商
+func (e *ExtractTask) GetRegionByTentativeOperator(winner string, all_regions *map[string]map[string]map[string]string) {
+	area, city, district := "", "", ""
+	regions := map[string]map[string]map[string]string{}
+	if OperatorReg.MatchString(winner) {
+		e.GetRegionFromText(winner, &regions, false, false, 2)
+	}
+	if len(regions) == 1 {
+		for k, v := range regions {
+			area = k
+			if len(v) == 1 {
+				for k1, v1 := range v {
+					city = k1
+					if len(v1) == 1 {
+						for k2, _ := range v1 {
+							district = k2
+						}
+					} else {
+						break
+					}
+				}
+			} else {
+				break
+			}
+		}
+	}
+	if area != "" { //组装结构
+		//舍弃运营商的数据-area不一致
+		if (*all_regions)[area] == nil && len((*all_regions)) > 0 {
+			return
+		}
+		city_info := map[string]map[string]string{}
+		if (*all_regions)[area] != nil {
+			city_info = (*all_regions)[area]
+		}
+		district_info := map[string]string{}
+		if city != "" {
+			//舍弃运营商的数据-city不一致
+			if city_info[city] == nil && len(city_info) > 0 {
+				return
+			}
+			if district != "" {
+				district_info[district] = district
+			}
+			city_info[city] = district_info
+		}
+		(*all_regions)[area] = city_info
+	}
+}
+
 //初步确认~站点
 //func (e *ExtractTask) GetRegionByTentativeSite(j *ju.Job, all_regions *map[string]map[string]map[string]string) {
 //	area, city, district := "", "", ""

+ 24 - 22
src/jy/extract/extractcity_way.go

@@ -20,7 +20,9 @@ var CleanDetailReg1 = regexp.MustCompile("(北京时间)")
 var XjbtReg *regexp.Regexp = regexp.MustCompile("^(新疆生产建设兵团|新疆兵团)")
 var SensitiveReg = regexp.MustCompile("(上一[条篇]|下一[条篇])[::].*")
 
-//取特殊类数据
+var OperatorReg = regexp.MustCompile("^中国(电信|联通|移动).*公司$")
+
+// 取特殊类数据
 func GetFilialeByBuyerInfo(buyer string) string {
 	if FilialeReg1.MatchString(buyer) {
 		return FilialeReg1.FindString(buyer)
@@ -35,7 +37,7 @@ func GetFilialeByBuyerInfo(buyer string) string {
 	return ""
 }
 
-//最终确认~指定地域
+// 最终确认~指定地域
 func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, area *string, city *string, district *string) bool {
 	if len(regions) > 1 || len(regions) == 0 {
 		return false
@@ -59,7 +61,7 @@ func ConfirmUniqueRegionInfo(regions map[string]map[string]map[string]string, ar
 	return false
 }
 
-//完整信息
+// 完整信息
 func CompleteRegionInfo(area *string, city *string, district *string) {
 	if *area == "北京" {
 		*city = "北京市"
@@ -80,7 +82,7 @@ func CompleteRegionInfo(area *string, city *string, district *string) {
 	}
 }
 
-//根据词获取所有的地域 ~ 暂时不采用三级简称提取城市
+// 根据词获取所有的地域 ~ 暂时不采用三级简称提取城市
 func (e *ExtractTask) takeRegionsFromWords(text string, isAddress bool, isBrief bool, regionsArr *[]map[string]string) {
 	//全称匹配
 	for pos_full, trie_full := range e.Trie_Fulls {
@@ -147,7 +149,7 @@ func (e *ExtractTask) takeRegionsFromWords(text string, isAddress bool, isBrief
 	}
 }
 
-//是否拼接数据~且根据地址类~首地域判断
+// 是否拼接数据~且根据地址类~首地域判断
 func SplicingRegionsInfo(isAddress bool, regionsArr *[]map[string]string, infoArr []map[string]string) {
 	if isAddress {
 		if len(*regionsArr) == 0 { //第一次
@@ -186,7 +188,7 @@ func SplicingRegionsInfo(isAddress bool, regionsArr *[]map[string]string, infoAr
 	}
 }
 
-//文本取地域   from  1~jsondata文本   2~其他文本
+// 文本取地域   from  1~jsondata文本   2~其他文本
 func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[string]map[string]string, isAddress bool, isBrief bool, from int) []map[string]interface{} {
 	regionValues := []map[string]interface{}{}
 	if text == "" {
@@ -214,7 +216,7 @@ func (e *ExtractTask) GetRegionFromText(text string, regions *map[string]map[str
 	return regionValues
 }
 
-//更新方法
+// 更新方法
 func UpdateRegionsInfo(area, city, district string, regions *map[string]map[string]map[string]string) {
 	if (*regions)[area] == nil {
 		city_info := map[string]map[string]string{}
@@ -242,7 +244,7 @@ func UpdateRegionsInfo(area, city, district string, regions *map[string]map[stri
 	}
 }
 
-//同组合并后合理性校验
+// 同组合并后合理性校验
 func ReasonableGroupRegionInfo(datas map[string]map[string]map[string]string) map[string]map[string]map[string]string {
 	if len(datas) > 2 || len(datas) == 0 { //省份超限,无效
 		return map[string]map[string]map[string]string{}
@@ -307,7 +309,7 @@ func ReasonableGroupRegionInfo(datas map[string]map[string]map[string]string) ma
 	return datas
 }
 
-//两组比对~找寻补充,排除数据
+// 两组比对~找寻补充,排除数据
 func AnalysisIsUniqueInfo(regions map[string]map[string]map[string]string, all_regions *map[string]map[string]map[string]string) {
 	if len(regions) == 0 {
 		return
@@ -345,7 +347,7 @@ func AnalysisIsUniqueInfo(regions map[string]map[string]map[string]string, all_r
 	}
 }
 
-//选取规则方法
+// 选取规则方法
 func ScreenOutReasonableRegionInfo(info map[string]string, regions_infosArr *[]map[string]string, new_regions_infosArr *[]map[string]string) map[string]string {
 	area := qu.ObjToString(info["area"])
 	city := qu.ObjToString(info["city"])
@@ -389,7 +391,7 @@ func ScreenOutReasonableRegionInfo(info map[string]string, regions_infosArr *[]m
 	return nil
 }
 
-//拆分地域数据~目的更好的合并选取
+// 拆分地域数据~目的更好的合并选取
 func splitRegionsInfos(infos map[string]map[string]map[string]string) []map[string]string {
 	infosArr := []map[string]string{}
 	for k, v := range infos {
@@ -410,7 +412,7 @@ func splitRegionsInfos(infos map[string]map[string]map[string]string) []map[stri
 	return infosArr
 }
 
-//日志流程记录~组级别
+// 日志流程记录~组级别
 func LogProcessRecordingForGroupInfo(key string, valueArr []string, fieldInfos map[string]interface{}, groupInfos map[string]map[string]map[string]string, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
 	groupArr := splitRegionsInfos(groupInfos)
 	finalluArr := splitRegionsInfos(finallyInfos)
@@ -425,12 +427,12 @@ func LogProcessRecordingForGroupInfo(key string, valueArr []string, fieldInfos m
 	*logRecordInfo = append(*logRecordInfo, data)
 }
 
-//日志流程记录~第二链路
+// 日志流程记录~第二链路
 func LogProcessRecordingForSupplement(logRecordInfo *[]map[string]interface{}, data map[string]interface{}) {
 	*logRecordInfo = append(*logRecordInfo, data)
 }
 
-//日志流程记录~初步
+// 日志流程记录~初步
 func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyInfos map[string]map[string]map[string]string, logRecordInfo *[]map[string]interface{}) {
 	finallyArr := splitRegionsInfos(finallyInfos)
 	data := map[string]interface{}{
@@ -440,7 +442,7 @@ func LogProcessRecordingForTentative(key string, valueArr interface{}, finallyIn
 	*logRecordInfo = append(*logRecordInfo, data)
 }
 
-//重构地域逻辑顺序
+// 重构地域逻辑顺序
 func (e *ExtractTask) IsConsecutionRegion(site string) bool {
 	isReset := false
 	if tmp := e.SiteCityMap[site]; tmp != nil {
@@ -452,7 +454,7 @@ func (e *ExtractTask) IsConsecutionRegion(site string) bool {
 	return isReset
 }
 
-//清洗项目名称
+// 清洗项目名称
 func CleanRegionProjectNameInfo(projectname string, buyer string) string {
 	new_str := projectname
 	if new_str == "" {
@@ -467,7 +469,7 @@ func CleanRegionProjectNameInfo(projectname string, buyer string) string {
 	return new_str
 }
 
-//清洗文本词组
+// 清洗文本词组
 func CleanRegionTextWords(wordsArr []string) []string {
 	if len(wordsArr) <= 1 {
 		return wordsArr
@@ -499,7 +501,7 @@ func CleanRegionTextWords(wordsArr []string) []string {
 	return newArr
 }
 
-//链路补充~全称类
+// 链路补充~全称类
 func (e *ExtractTask) LinkSpecialRuleFullStep(text string, area *string, city *string, district *string) {
 	regions := map[string]map[string]map[string]string{}
 	wordsArr := e.Seg_SV.Cut(text, true)
@@ -525,7 +527,7 @@ func (e *ExtractTask) LinkSpecialRuleFullStep(text string, area *string, city *s
 	}
 }
 
-//链路补充~简称类
+// 链路补充~简称类
 func (e *ExtractTask) LinkSpecialRuleBriefStep(text string, area *string, city *string, district *string) {
 	regions := map[string]map[string]map[string]string{}
 	wordsArr := e.Seg_SV.Cut(text, true)
@@ -558,7 +560,7 @@ func (e *ExtractTask) LinkSpecialRuleBriefStep(text string, area *string, city *
 	}
 }
 
-//新增原则
+// 新增原则
 func LinkAddedRules(regions map[string]map[string]map[string]string, area *string, city *string, district *string) {
 	if len(regions) == 1 {
 		for k, v := range regions {
@@ -577,7 +579,7 @@ func LinkAddedRules(regions map[string]map[string]map[string]string, area *strin
 	}
 }
 
-//补充原则
+// 补充原则
 func LinkSuppleRules(regions map[string]map[string]map[string]string, area *string, city *string, district *string) {
 	for k, v := range regions {
 		if *area == k {
@@ -595,7 +597,7 @@ func LinkSuppleRules(regions map[string]map[string]map[string]string, area *stri
 	}
 }
 
-//链路补充~企业校验步骤
+// 链路补充~企业校验步骤
 func LinkSpecialQyxyStep(buyer string, area *string, city *string, district *string) {
 	qyxy_arr, _ := ju.Qyxy_Mgo.Find("qyxy_std", map[string]interface{}{
 		"company_name": buyer,

+ 0 - 2
src/jy/extract/extractinit.go

@@ -1365,7 +1365,6 @@ func (e *ExtractTask) InitPostCode() {
 		pc.P = qu.ObjToString(l["province"])
 		pc.C = qu.ObjToString(l["city"])
 		pc.D = ju.ConvertInterface(l["district"])
-		//pc.D = qu.ObjArrToStringArr(l["district"].([]interface{}))
 		e.PostCodeMap[pc.Code] = pc
 	}
 }
@@ -1380,7 +1379,6 @@ func (e *ExtractTask) InitAreaCode() {
 		ac.Code = qu.ObjToString(l["code"])
 		ac.P = qu.ObjToString(l["province"])
 		ac.C = ju.ConvertInterface(l["city"])
-		//ac.C = qu.ObjArrToStringArr(l["city"].([]interface{}))
 		e.AreaCodeMap[ac.Code] = ac
 	}
 }

+ 26 - 2
src/jy/extract/extractudp.go

@@ -127,8 +127,7 @@ var ext *ExtractTask
 // 根据id区间抽取-udp模式
 func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 	defer qu.Catch()
-	if ext == nil || ju.IsUpdateRule {
-		ju.IsUpdateRule = false
+	if ext == nil {
 		ext = nil
 		ext = &ExtractTask{}
 		ext.Id = qu.ObjToString(ju.Config["udptaskid"])
@@ -167,6 +166,31 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 		ext.IsRun = true
 		ext.BidTotal = 0
 	} else {
+		if ju.IsUpdateRule {
+			ju.IsUpdateRule = false
+			log.Debug("每天更新一次规则......")
+			//规则重置
+			ext.InitSite()
+			ext.InitRulePres()
+			ext.InitRuleBacks(false)
+			ext.InitRuleBacks(true)
+			ext.InitRuleCore(false)
+			ext.InitRuleCore(true)
+			ext.InitBlockRule()
+			ext.InitPkgCore()
+			ext.InitTag(false)
+			ext.InitTag(true)
+			ext.InitClearFn(false)
+			ext.InitClearFn(true)
+			//地域重置
+			ext.Lock()
+			if ext.IsExtractCity { //版本上控制是否开始城市抽取
+				ext.InitCityInfo()
+				ext.InitAreaCode()
+				ext.InitPostCode()
+			}
+			ext.Unlock()
+		}
 		ext.BidTotal = 0
 	}
 	index := 0

+ 9 - 9
src/jy/util/util.go

@@ -13,12 +13,12 @@ import (
 	. "gopkg.in/mgo.v2/bson"
 )
 
-//敏感词
+// 敏感词
 type DFA struct {
 	Link map[string]interface{}
 }
 
-//定义字典树
+// 定义字典树
 type Trie struct {
 	y bool
 	c map[rune]*Trie
@@ -96,7 +96,7 @@ func UtilInit() {
 	IsUpdateRule = false
 	c := cron.New()
 	c.AddFunc("0 0 8 * * ?", func() {
-		//IsUpdateRule = true
+		IsUpdateRule = true
 	})
 	c.Start()
 
@@ -114,7 +114,7 @@ func GetSyncIndex(code string) string {
 	return tmp
 }
 
-//nfields非复制字段集
+// nfields非复制字段集
 func DeepCopy(value interface{}) interface{} {
 	if valueMap, ok := value.(map[string]interface{}); ok {
 		newMap := make(map[string]interface{})
@@ -223,13 +223,13 @@ func (t *Trie) Get(word string) bool {
 	return cur.y
 }
 
-//初始化商品
+// 初始化商品
 func InitGoods() {
 	GoodsGet = &DFA{}
 	GoodsGet.AddWord(GoodsConfig...)
 }
 
-//初始化品牌
+// 初始化品牌
 func InitBrand() {
 	BrandGet = &DFA{}
 	BrandGet.AddWord(BrandConfig...)
@@ -276,7 +276,7 @@ func IsMarkInterfaceMap(t interface{}) []map[string]interface{} {
 	return p_list
 }
 
-//GetQualifications 从正文中获取气质要求
+// GetQualifications 从正文中获取气质要求
 func GetQualifications(text string) (qualifications string) {
 	re1 := regexp.MustCompile(`(\n(\s)*(\d)[、..](\s*)[\p{Han}]+[::]?)`)
 	re2 := regexp.MustCompile(`(?m)^(★)?(一|二|三|四|五|六|七|八|九|十|十一)[、.](.+?)[\p{Han}]?[::]?$`)
@@ -332,7 +332,7 @@ func ExtractSections(text string, re *regexp.Regexp) []map[string]interface{} {
 	return sections
 }
 
-//ContainSpecialWord 判断一个字符串是否包含特殊关键词
+// ContainSpecialWord 判断一个字符串是否包含特殊关键词
 func ContainSpecialWord(key string, words []string) bool {
 	//含有排除关键词,直接跳过
 	for _, word := range words {
@@ -344,7 +344,7 @@ func ContainSpecialWord(key string, words []string) bool {
 	return false
 }
 
-//RemoveDuplicates 移除重复字符串
+// RemoveDuplicates 移除重复字符串
 func RemoveDuplicates(input []string) []string {
 	seen := make(map[string]bool)
 	output := []string{}