浏览代码

数据lock-城市抽取

unknown 6 年之前
父节点
当前提交
50f22e8b9a

+ 1 - 2
src/config.json

@@ -2,11 +2,10 @@
     "port": "9090",
     "mgodb": "192.168.3.207:27082",
     "dbsize": 5,
-    "dbname": "extract_v3",
+    "dbname": "extract_kf",
     "redis": "buyer=192.168.3.18:6379,winner=192.168.3.18:6379",
     "saveresult": true,
     "fieldscore": true,
-    "buyerwinner": false,
     "qualityaudit": true,
     "udptaskid": "5be107e600746bf92debf080",
     "udpip": "127.0.0.1",

+ 1 - 1
src/jy/admin/audit/rulemanager.go

@@ -21,7 +21,7 @@ func init() {
 	Admin.GET("/audit/recogfield", func(c *gin.Context) {
 		c.HTML(200, "audit_recogfield.html", nil)
 	})
-	Admin.POST("/rulemanager/getrecogfield", GetRecogField)   //新增识别字段
+	Admin.POST("/rulemanager/getrecogfield", GetRecogField)   //获取识别字段
 	Admin.POST("/rulemanager/saverecogfield", SaveRecogField) //保存
 	Admin.POST("/rulemanager/delrecogfield", DelRecogField)   //删除
 	//class

+ 50 - 38
src/jy/admin/version.go

@@ -26,38 +26,44 @@ func init() {
 		c.JSON(200, gin.H{"data": data})
 	})
 	Admin.POST("/version/save", func(c *gin.Context) {
-		version, _ := c.GetPostForm("version")
-		tmp, _ := Mgo.FindOne("version", `{"version":"`+version+`"}`)
-		if len(*tmp) > 0 {
-			c.JSON(200, gin.H{"rep": false})
+		_id, _ := c.GetPostForm("_id")
+		data := GetPostForm(c)
+		if _id != "" {
+			Mgo.UpdateById("version", _id, map[string]interface{}{"$set": data})
+			c.JSON(200, gin.H{"rep": true})
 		} else {
-			s_pversionid, _ := c.GetPostForm("s_pversionid")
-			data := GetPostForm(c)
-			iscopyfiled, _ := data["iscopyfiled"].(bool)
-			data["l_createtime"] = time.Now().Unix()
-			s_username := sessions.Default(c).Get("username").(string)
-			data["s_username"] = s_username
-			data["delete"] = false
-			vid := Mgo.Save("version", data)
-			if s_pversionid != "" {
-				copyComRules(version, s_pversionid, s_username)
-			}
-			if iscopyfiled {
-				list, _ := Mgo.Find("versioninfo", `{"vid":`+s_pversionid+`,"delete":false}`, nil, nil, false, -1, -1)
-				log.Println(s_pversionid, len(*list))
-				for _, v := range *list {
-					delete(v, "_id")
-					v["l_createtime"] = time.Now().Unix()
-					v["s_username"] = sessions.Default(c).Get("username")
-					v["l_lasttime"] = time.Now().Unix()
-					v["vid"] = vid
-					v["delete"] = false
-					pid := Mgo.Save("versioninfo", v)
-					s_field := qu.ObjToString(v["s_field"])
-					copyFieldRules(vid, pid, s_field, s_pversionid, s_username)
+			version, _ := c.GetPostForm("version")
+			tmp, _ := Mgo.FindOne("version", `{"version":"`+version+`","delete":false}`)
+			if len(*tmp) > 0 {
+				c.JSON(200, gin.H{"rep": false})
+			} else {
+				s_pversionid, _ := c.GetPostForm("s_pversionid")
+				iscopyfiled, _ := data["iscopyfiled"].(bool)
+				data["l_createtime"] = time.Now().Unix()
+				s_username := sessions.Default(c).Get("username").(string)
+				data["s_username"] = s_username
+				data["delete"] = false
+				vid := Mgo.Save("version", data)
+				if s_pversionid != "" {
+					copyComRules(version, s_pversionid, s_username)
 				}
+				if iscopyfiled {
+					list, _ := Mgo.Find("versioninfo", `{"vid":`+s_pversionid+`,"delete":false}`, nil, nil, false, -1, -1)
+					log.Println(s_pversionid, len(*list))
+					for _, v := range *list {
+						delete(v, "_id")
+						v["l_createtime"] = time.Now().Unix()
+						v["s_username"] = sessions.Default(c).Get("username")
+						v["l_lasttime"] = time.Now().Unix()
+						v["vid"] = vid
+						v["delete"] = false
+						pid := Mgo.Save("versioninfo", v)
+						s_field := qu.ObjToString(v["s_field"])
+						copyFieldRules(vid, pid, s_field, s_pversionid, s_username)
+					}
+				}
+				c.JSON(200, gin.H{"rep": true})
 			}
-			c.JSON(200, gin.H{"rep": true})
 		}
 	})
 	Admin.POST("/version/use", func(c *gin.Context) {
@@ -106,20 +112,26 @@ func init() {
 		data := GetPostForm(c)
 		if _id != "" {
 			Mgo.UpdateById("versioninfo", _id, map[string]interface{}{"$set": data})
+			c.JSON(200, gin.H{"rep": true})
 		} else {
 			s_field, _ := c.GetPostForm("s_field")
 			vid, _ := c.GetPostForm("vid")
-			data["l_createtime"] = time.Now().Unix()
-			data["s_username"] = sessions.Default(c).Get("username")
-			data["l_lasttime"] = time.Now().Unix()
-			data["delete"] = false
-			pid := Mgo.Save("versioninfo", data)
-			fromvid, _ := data["s_pversionid"].(string)
-			if fromvid != "" {
-				copyFieldRules(vid, pid, s_field, fromvid, sessions.Default(c).Get("username").(string))
+			tmp, _ := Mgo.FindOne("versioninfo", `{"s_field":"`+s_field+`","vid":"`+vid+`","delete":false}`)
+			if len(*tmp) > 0 {
+				c.JSON(200, gin.H{"rep": false})
+			} else {
+				data["l_createtime"] = time.Now().Unix()
+				data["s_username"] = sessions.Default(c).Get("username")
+				data["l_lasttime"] = time.Now().Unix()
+				data["delete"] = false
+				pid := Mgo.Save("versioninfo", data)
+				fromvid, _ := data["s_pversionid"].(string)
+				if fromvid != "" {
+					copyFieldRules(vid, pid, s_field, fromvid, sessions.Default(c).Get("username").(string))
+				}
+				c.JSON(200, gin.H{"rep": true})
 			}
 		}
-		c.JSON(200, gin.H{"rep": true})
 	})
 	Admin.POST("/version/infouse", func(c *gin.Context) {
 		_id, _ := c.GetPostForm("_id")

+ 52 - 49
src/jy/extract/extract.go

@@ -19,14 +19,13 @@ import (
 )
 
 var (
-	lock        sync.RWMutex
-	cut         = ju.NewCut()                          //获取正文并清理
-	ExtLogs     map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList    map[string]*ExtractTask                //任务列表
-	saveLimit   = 200                                  //抽取日志批量保存
-	PageSize    = 5000                                 //查询分页
-	Fields      = `{"title":1,"detail":1,"contenthtml":1,"href":1,"site":1,"spidercode":1,"toptype":1,"area":1,"city":1}`
-	AuditFields = []string{} //需要审核的字段名称
+	lock      sync.RWMutex
+	cut       = ju.NewCut()                          //获取正文并清理
+	ExtLogs   map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList  map[string]*ExtractTask                //任务列表
+	saveLimit = 200                                  //抽取日志批量保存
+	PageSize  = 5000                                 //查询分页
+	Fields    = `{"title":1,"detail":1,"contenthtml":1,"href":1,"site":1,"spidercode":1,"toptype":1,"area":1,"city":1}`
 )
 
 //启动测试抽取
@@ -42,15 +41,14 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitRuleCore()
 	ext.InitTag()
 	ext.InitClearFn()
-	//城市
-	ext.InitProvince()
-	ext.InitCityAll()
-	ext.InitCitySim()
-	InitDFA()
+	if ext.IsExtractCity { //版本上控制是否开始城市抽取
+		//初始化城市DFA信息
+		ext.InitDFA()
+	}
 	//质量审核
-	InitAuditRule()
-	InitAuditClass()
-	InitAuditRecogField()
+	ext.InitAuditRule()
+	ext.InitAuditClass()
+	ext.InitAuditRecogField()
 	return RunExtractTestTask(ext, startId, num)
 }
 
@@ -96,15 +94,14 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitRuleCore()
 	ext.InitTag()
 	ext.InitClearFn()
-	//城市
-	ext.InitProvince()
-	ext.InitCityAll()
-	ext.InitCitySim()
-	InitDFA()
+	if ext.IsExtractCity { //版本上控制是否开始城市抽取
+		//初始化城市DFA信息
+		ext.InitDFA()
+	}
 	//质量审核
-	InitAuditRule()
-	InitAuditClass()
-	InitAuditRecogField()
+	ext.InitAuditRule()
+	ext.InitAuditClass()
+	ext.InitAuditRecogField()
 
 	ext.IsRun = true
 	go ext.ResultSave()
@@ -781,15 +778,15 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 
 	//质量审核
 	if ju.Config["qualityaudit"].(bool) {
-		QualityAudit(resulttmp)
-
+		e.QualityAudit(resulttmp)
 	}
-	b, p, c, d := TransmitData(resulttmp, _id) //抽取省份城市
-	//log.Println("抽取省份,城市,县结果=====", b, p, c, d)
-	resulttmp["district"] = d
-	if b {
-		resulttmp["city"] = c
-		resulttmp["area"] = p
+	if e.IsExtractCity { //城市抽取
+		b, p, c, d := e.TransmitData(resulttmp, _id) //抽取省份城市
+		resulttmp["district"] = d
+		if b {
+			resulttmp["city"] = c
+			resulttmp["area"] = p
+		}
 	}
 
 	if e.TaskInfo.TestColl == "" {
@@ -824,10 +821,10 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 	}
 }
 
-func QualityAudit(resulttmp map[string]interface{}) {
+func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 	//获取审核字段
 	//log.Println("需要审核的字段-----", AuditFields)
-	if len(AuditFields) == 0 {
+	if len(e.AuditFields) == 0 {
 		v, _ := db.Mgo.FindOne("version", `{"isuse":true,"delete":false}`) //查找当前使用版本
 		if len(*v) > 0 {                                                   //查找当前使用版本中属性配置需要审核的字段
 			vid := qu.BsonIdToSId((*v)["_id"])
@@ -839,11 +836,11 @@ func QualityAudit(resulttmp map[string]interface{}) {
 			data, _ := db.Mgo.Find("versioninfo", query, `{"_id":-1}`, `{"s_field":1}`, false, -1, -1)
 			for _, d := range *data {
 				field := qu.ObjToString(d["s_field"])
-				AuditFields = append(AuditFields, field)
+				e.AuditFields = append(e.AuditFields, field)
 			}
 		}
 	}
-	for _, field := range AuditFields {
+	for _, field := range e.AuditFields {
 		//1.分包
 		if resulttmp["package"] != nil {
 			packagedata := resulttmp["package"].(map[string]map[string]interface{})
@@ -852,11 +849,11 @@ func QualityAudit(resulttmp map[string]interface{}) {
 					fv := qu.ObjToString(val[field])
 					if fv != "" {
 						if field == "buyer" || field == "winner" { //field为buyer和winner时特殊处理,先从Redis中查,有直接通过,没有走匹配规则
-							RedisMatch(field, fv, val) //redis匹配
+							e.RedisMatch(field, fv, val) //redis匹配
 						} else { //除了buyer和winner,其他字段走规则匹配
 							fv := qu.ObjToString(resulttmp[field])
 							//resulttmp[field+"_isredis"] = false
-							RuleMatch(field, fv, resulttmp)
+							e.RuleMatch(field, fv, resulttmp)
 						}
 					}
 				}
@@ -867,11 +864,11 @@ func QualityAudit(resulttmp map[string]interface{}) {
 			fv := qu.ObjToString(resulttmp[field])
 			if fv != "" {
 				if field == "buyer" || field == "winner" { //field为buyer和winner时特殊处理,先从Redis中查,有直接通过,没有走匹配规则
-					RedisMatch(field, fv, resulttmp) //redis匹配
+					e.RedisMatch(field, fv, resulttmp) //redis匹配
 				} else { //除了buyer和winner,其他字段走规则匹配
 					fv := qu.ObjToString(resulttmp[field])
 					//resulttmp[field+"_isredis"] = false
-					RuleMatch(field, fv, resulttmp)
+					e.RuleMatch(field, fv, resulttmp)
 				}
 			}
 		}
@@ -879,38 +876,42 @@ func QualityAudit(resulttmp map[string]interface{}) {
 }
 
 //Redis匹配
-func RedisMatch(field, fv string, val map[string]interface{}) {
+func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
 	if i == 0 {                            //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
-		RuleMatch(field, fv, val) //规则匹配
+		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库
 		val[field+"_isredis"] = true
 	}
 }
 
 //规则匹配
-func RuleMatch(field, fieldval string, tmpMap map[string]interface{}) {
+func (e *ExtractTask) RuleMatch(field, fieldval string, tmpMap map[string]interface{}) {
 	if fieldval != "" {
-		SMap := StartMatch(field, fieldval)
+		SMap := e.StartMatch(field, fieldval)
 		//SMap.AddKey(field+"_isaudit", false)
 		for _, k := range SMap.Keys {
 			tmpMap[k] = SMap.Map[k]
 		}
-		tmpMap[field+"_isaudit"] = false
+		tmpMap[field+"_isaudit"] = false //添加字段未审核信息
 	}
 }
 
 //开始规则匹配
-func StartMatch(field, text string) *pretreated.SortMap {
+func (e *ExtractTask) StartMatch(field, text string) *pretreated.SortMap {
 	SMap := pretreated.NewSortMap()
-	f := RecogFieldMap[field]
+	lock.Lock()
+	f := e.RecogFieldMap[field]
+	lock.Unlock()
 	if len(f) > 0 {
 		fid := qu.BsonIdToSId(f["_id"])
 		recogFieldPreRule := qu.ObjToString(f["s_recogfield_prerule"])
 		textAfterRecogFieldPrerule := ju.PreFilter(text, recogFieldPreRule) //识别字段的前置过滤
 		if textAfterRecogFieldPrerule != "" {
-			classMap := FidClassMap[fid]
+			lock.Lock()
+			classMap := e.FidClassMap[fid]
+			lock.Unlock()
 		L:
 			for _, c := range classMap { //class
 				classid := qu.BsonIdToSId(c["_id"])
@@ -918,7 +919,9 @@ func StartMatch(field, text string) *pretreated.SortMap {
 				savefield := qu.ObjToString(c["s_savefield"])                                   //保存字段
 				textAfterClassPrerule := ju.PreFilter(textAfterRecogFieldPrerule, classPrerule) //class的前置过滤
 				if textAfterClassPrerule != "" {
-					ruleMap := CidRuleMap[classid]
+					lock.Lock()
+					ruleMap := e.CidRuleMap[classid]
+					lock.Unlock()
 					for _, r := range ruleMap { //rule
 						rulePrerule := qu.ObjToString(r["s_rule_prerule"])
 						s_code := qu.ObjToString(r["s_code"])

+ 119 - 87
src/jy/extract/extractInit.go

@@ -50,25 +50,39 @@ type Tag struct {
 }
 
 type ExtractTask struct {
-	Id        string              //任务id
-	IsRun     bool                //是否启动
-	Content   string              //信息内容
-	TaskInfo  *TaskInfo           //任务信息
-	RulePres  []*RegLuaInfo       //通用前置规则
-	RuleBacks []*RegLuaInfo       //通用后置规则
-	RuleCores []*RuleCore         //抽取规则
-	Tag       map[string][]*Tag   //标签库
-	ClearFn   map[string][]string //清理函数
+	Id            string              //任务id
+	IsRun         bool                //是否启动
+	Content       string              //信息内容
+	TaskInfo      *TaskInfo           //任务信息
+	RulePres      []*RegLuaInfo       //通用前置规则
+	RuleBacks     []*RegLuaInfo       //通用后置规则
+	RuleCores     []*RuleCore         //抽取规则
+	Tag           map[string][]*Tag   //标签库
+	ClearFn       map[string][]string //清理函数
+	IsExtractCity bool                //是否开启城市抽取
 
 	ResultChanel chan bool                  //抽取结果详情
 	ResultArr    [][]map[string]interface{} //抽取结果详情
 	BidChanel    chan bool                  //抽取结果
 	BidArr       [][]map[string]interface{} //抽取结果
-}
 
-var RecogFieldMap map[string]map[string]interface{}
-var FidClassMap map[string][]map[string]interface{}
-var CidRuleMap map[string][]map[string]interface{}
+	RecogFieldMap map[string]map[string]interface{}   //识别字段
+	FidClassMap   map[string][]map[string]interface{} //分类
+	CidRuleMap    map[string][]map[string]interface{} //规则
+	AuditFields   []string                            //需要审核的字段名称
+
+	ProvinceMap       map[string]string
+	CityBrief         map[string]*City     //只加载一次即可
+	ProvinceBrief     map[string]*Province //只加载一次
+	AreaToCity        map[string][]*City   //两个文件共用
+	DistrictCityMap   map[string]*City
+	StreetDistrictMap map[string]*District
+	AreaGet           DFA //市全称
+	AreaDistrict      DFA //区或县
+	AreaProvinceGet   DFA //省
+	AreaSimGet        DFA //市简称
+	AreaStreet        DFA //街道
+}
 
 func init() {
 	TaskList = make(map[string]*ExtractTask)
@@ -92,6 +106,7 @@ func (e *ExtractTask) InitTestTaskInfo(resultcoll, trackcoll string) {
 			IsEtxLog:    true,
 			ProcessPool: make(chan bool, 1),
 		}
+		e.IsExtractCity = (*v)["isextractcity"].(bool)
 	} else {
 		return
 	}
@@ -116,6 +131,7 @@ func (e *ExtractTask) InitTaskInfo() {
 			LastExtId:   qu.ObjToString((*task)["s_extlastid"]),
 			ProcessPool: make(chan bool, qu.IntAllDef((*task)["i_process"], 1)),
 		}
+		e.IsExtractCity = (*v)["isextractcity"].(bool)
 		log.Println(e.TaskInfo.Name, "thread:", qu.IntAllDef((*task)["i_process"], 1))
 	} else {
 		return
@@ -329,7 +345,6 @@ func (e *ExtractTask) InitTag() {
 			for k, key := range tmp {
 				tag := &Tag{Type: "string", Key: key.(string)}
 				e.Tag[field] = append(e.Tag[field], tag)
-
 				tab.Items[k] = &ju.Tag{key.(string), 0 - k, nil}
 			}
 			sort.Sort(tab.Items)
@@ -347,7 +362,6 @@ func (e *ExtractTask) InitTag() {
 			for k, key := range tmp {
 				tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))}
 				e.Tag[field] = append(e.Tag[field], tag)
-
 				tab.Items[k] = &ju.Tag{key.(string), 0 - k, regexp.MustCompile(key.(string))}
 			}
 			sort.Sort(tab.Items)
@@ -386,10 +400,10 @@ func (e *ExtractTask) InitClearFn() {
 }
 
 //加载省份
-func (e *ExtractTask) InitProvince() {
+func InitProvince(version string) map[string]interface{} {
 	defer qu.Catch()
 	fn := map[string]interface{}{}
-	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
+	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *list {
 		name := qu.ObjToString(v["s_name"])
 		content := v["content"]
@@ -400,124 +414,143 @@ func (e *ExtractTask) InitProvince() {
 			fn[name] = content
 		}
 	}
-	ProviceConfig = fn
+	return fn
 }
 
 //加载城市简称
-func (e *ExtractTask) InitCitySim() {
+func InitCitySim(version string) map[string]map[string]interface{} {
 	defer qu.Catch()
-	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
+	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"citysim","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
 	fn := map[string]map[string]interface{}{}
 	for _, v := range *list {
 		name := qu.ObjToString(v["s_name"])
 		tmp := v["content"].(map[string]interface{})
 		fn[name] = tmp
 	}
-	CitySimConfig = fn
+	return fn
 }
 
 //加载城市全称
-func (e *ExtractTask) InitCityAll() {
+func InitCityAll(version string) map[string]map[string]interface{} {
 	defer qu.Catch()
-	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
-	//	if len(*list) != 34 {
-	//		fmt.Println("加载城市配置文件出错", len(*list))
-	//	}
+	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+version+`","delete":false}`, nil, nil, false, -1, -1)
 	fn := map[string]map[string]interface{}{}
 	for _, v := range *list {
 		name := qu.ObjToString(v["s_name"])
 		tmp := v["content"].(map[string]interface{})
 		fn[name] = tmp
 	}
-	CityAllConfig = fn
+	return fn
 }
 
 //初始化城市省份敏感词
-func InitDFA() {
+func (e *ExtractTask) InitDFA() {
 	defer qu.Catch()
-	AreaGet = DFA{}
-	AreaProvinceGet = DFA{}
-	AreaStreet = DFA{}
-	for k, v := range ProviceConfig {
+	e.AreaGet = DFA{}
+	e.AreaProvinceGet = DFA{}
+	e.AreaStreet = DFA{}
+	//初始化map
+	if e.ProvinceMap == nil {
+		e.ProvinceMap = make(map[string]string)
+	}
+	if e.CityBrief == nil {
+		e.CityBrief = make(map[string]*City)
+	}
+	if e.ProvinceBrief == nil {
+		e.ProvinceBrief = make(map[string]*Province)
+	}
+	if e.AreaToCity == nil {
+		e.AreaToCity = make(map[string][]*City)
+	}
+	if e.DistrictCityMap == nil {
+		e.DistrictCityMap = make(map[string]*City)
+	}
+	if e.StreetDistrictMap == nil {
+		e.StreetDistrictMap = make(map[string]*District)
+	}
+	//初始化省
+	fn1 := InitProvince(e.TaskInfo.Version)
+	for k, v := range fn1 {
 		for _, p := range v.([]interface{}) {
 			p1, _ := p.(string)
-			AreaProvinceGet.AddWord(p1)
-			ProvinceMap[p1] = k
+			e.AreaProvinceGet.AddWord(p1)
+			e.ProvinceMap[p1] = k
 		}
 	}
-	//	ProvinceMap["新疆省"] = "新疆"
-	//	ProvinceMap["新疆兵团"] = "新疆"
-	//	provinceMap["广西省"] = "广西"
-	for k, v := range CityAllConfig {
-		AreaProvinceGet.AddWord(k) //省全称
+
+	//初始化城市全称
+	fn2 := InitCityAll(e.TaskInfo.Version)
+	for k, v := range fn2 {
+		e.AreaProvinceGet.AddWord(k) //省全称
 		p := &Province{}
 		p.Name = k
 		p.Brief = v["brief"].(string)
-		ProvinceMap[k] = p.Brief
-		ProvinceBrief[p.Brief] = p
+		e.ProvinceMap[k] = p.Brief
+		//
+		e.ProvinceBrief[p.Brief] = p
 		p.Cap = v["captial"].(string)
 		city, _ := v["city"].(map[string]interface{})
 		for k1, v1 := range city {
 			v1m, _ := v1.(map[string]interface{})
 			c := &City{}
 			c.Name = k1
-			if v1m["brief"] == nil {
-			}
+			//			if v1m["brief"] == nil {
+			//			}
 			c.Brief = v1m["brief"].(string)
-			CityBrief[c.Brief] = c
+			//
+			e.CityBrief[c.Brief] = c
 			c.P = p
 			if c.Brief == p.Cap {
 				p.Captial = c
 			}
 			//加入到城市map中
-			cs := AreaToCity[k1]
-			AreaGet.AddWord(k1) //市全称
+			//
+			cs := e.AreaToCity[k1]
+			e.AreaGet.AddWord(k1) //市全称
 			if cs != nil {
 				cs = append(cs, c)
 			} else {
 				cs = []*City{c}
 			}
-			AreaToCity[k1] = cs
-
+			e.AreaToCity[k1] = cs
 			//区县
 			districtmap := v1m["area"].(map[string]interface{}) //区或县
 			for district, streetarr := range districtmap {
 				d := &District{}
 				d.Name = district
 				d.C = c
-				AreaDistrict.AddWord(district) //加入区或县敏感词
-				ctmp := DistrictCityMap[district]
+				e.AreaDistrict.AddWord(district) //加入区或县敏感词
+				ctmp := e.DistrictCityMap[district]
 				if ctmp == nil {
-					DistrictCityMap[district] = c
+					e.DistrictCityMap[district] = c
 				}
-
 				//街道
 				for _, s := range qu.ObjArrToStringArr(streetarr.([]interface{})) {
-					AreaStreet.AddWord(s) //加入街道敏感词
-					dtmp := StreetDistrictMap[s]
+					e.AreaStreet.AddWord(s) //加入街道敏感词
+					dtmp := e.StreetDistrictMap[s]
 					if dtmp == nil {
-						StreetDistrictMap[s] = d
+						e.StreetDistrictMap[s] = d
 					}
 				}
 			}
 		}
 	}
-
-	//加载简称
-	AreaSimGet = DFA{}
-	for k, v := range CitySimConfig {
+	//初始化城市简称
+	fn3 := InitCitySim(e.TaskInfo.Version)
+	e.AreaSimGet = DFA{}
+	for k, v := range fn3 {
 		pb := v["brief"].(string)
-		p := ProvinceBrief[pb]
+		p := e.ProvinceBrief[pb]
 		//加载
 		for _, ss := range []string{k, pb} {
-			cs := AreaToCity[ss]
+			cs := e.AreaToCity[ss]
 			if cs != nil {
 				cs = append(cs, p.Captial)
 			} else {
 				cs = []*City{p.Captial}
 			}
-			AreaToCity[ss] = cs
-			AreaSimGet.AddWord(ss) //省全称和省简称
+			e.AreaToCity[ss] = cs
+			e.AreaSimGet.AddWord(ss) //省全称和省简称
 		}
 		city, _ := v["city"].(map[string]interface{})
 		for k1, v1 := range city {
@@ -525,40 +558,40 @@ func InitDFA() {
 			if v1m["brief"] == nil {
 			}
 			cb := v1m["brief"].(string)
-			c := AreaToCity[k1][0]
+			c := e.AreaToCity[k1][0]
 			//加入到城市map中
 			for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州  浙江杭州
-				AreaSimGet.AddWord(ss)
-				cs := AreaToCity[ss]
+				e.AreaSimGet.AddWord(ss)
+				cs := e.AreaToCity[ss]
 				if cs != nil {
 					cs = append(cs, c)
 				} else {
 					cs = []*City{c}
 				}
-				AreaToCity[ss] = cs
+				e.AreaToCity[ss] = cs
 			}
 			arr := v1m["area"].([]interface{})
 			for _, k2 := range arr {
 				s := k2.(string)
 				for n, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安
-					cs := AreaToCity[ss]
-					AreaSimGet.AddWord(ss)
+					cs := e.AreaToCity[ss]
+					e.AreaSimGet.AddWord(ss)
 					if cs != nil {
 						cs = append(cs, c)
 					} else {
 						cs = []*City{c}
 					}
-					AreaToCity[ss] = cs
+					e.AreaToCity[ss] = cs
 
 					//只加入简称
 					if n == 0 {
 						d := &District{}
 						d.Name = ss
 						d.C = c
-						AreaDistrict.AddWord(ss) //加入区或县简称敏感词
-						ctmp := DistrictCityMap[ss]
+						e.AreaDistrict.AddWord(ss) //加入区或县简称敏感词
+						ctmp := e.DistrictCityMap[ss]
 						if ctmp == nil {
-							DistrictCityMap[ss] = c
+							e.DistrictCityMap[ss] = c
 						}
 					}
 
@@ -648,39 +681,39 @@ func (e *ExtractTask) BidSave() {
 	}
 }
 
-func InitAuditRecogField() {
+func (e *ExtractTask) InitAuditRecogField() {
 	defer qu.Catch()
-	RecogFieldMap = make(map[string]map[string]interface{})
+	e.RecogFieldMap = make(map[string]map[string]interface{})
 	recogFieldList, _ := db.Mgo.Find("rc_field", `{"delete":false}`, `{"_id":1}`, `{"s_recogfield":1,"s_recogfield_prerule":1}`, false, -1, -1)
 	for _, f := range *recogFieldList {
 		field := qu.ObjToString(f["s_recogfield"])
-		RecogFieldMap[field] = f
+		e.RecogFieldMap[field] = f
 	}
 }
 
-func InitAuditClass() {
+func (e *ExtractTask) InitAuditClass() {
 	defer qu.Catch()
-	FidClassMap = make(map[string][]map[string]interface{})
+	e.FidClassMap = make(map[string][]map[string]interface{})
 	class, _ := db.Mgo.Find("rc_class", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1)
 	for _, c := range *class {
 		classList := []map[string]interface{}{}
 		fid := qu.ObjToString(c["s_fid"])
-		if len(FidClassMap[fid]) > 0 { //追加
-			classList = FidClassMap[fid]
+		if len(e.FidClassMap[fid]) > 0 { //追加
+			classList = e.FidClassMap[fid]
 		}
 		classList = append(classList, c)
-		FidClassMap[fid] = classList
+		e.FidClassMap[fid] = classList
 	}
 }
 
 //加载规则
-func InitAuditRule() {
+func (e *ExtractTask) InitAuditRule() {
 	defer qu.Catch()
 	var rureg *regexp.Regexp
 	var rs []rune
 	var ru string
 	var err error
-	CidRuleMap = make(map[string][]map[string]interface{})
+	e.CidRuleMap = make(map[string][]map[string]interface{})
 	rule, _ := db.Mgo.Find("rc_rule", `{"delete":false}`, `{"i_order":1}`, nil, false, -1, -1)
 	for _, v := range *rule {
 		i_rule := []interface{}{}
@@ -704,11 +737,10 @@ func InitAuditRule() {
 
 		ruleList := []map[string]interface{}{}
 		classid := qu.ObjToString(v["s_classid"])
-		if len(CidRuleMap[classid]) > 0 { //追加
-			ruleList = CidRuleMap[classid]
-
+		if len(e.CidRuleMap[classid]) > 0 { //追加
+			ruleList = e.CidRuleMap[classid]
 		}
 		ruleList = append(ruleList, v)
-		CidRuleMap[classid] = ruleList
+		e.CidRuleMap[classid] = ruleList
 	}
 }

+ 70 - 50
src/jy/extract/extractcity.go

@@ -2,6 +2,7 @@ package extract
 
 import (
 	"fmt"
+	"log"
 	qu "qfw/util"
 	"strings"
 )
@@ -39,57 +40,45 @@ type DFA struct {
 }
 
 var SortField []string
-var (
-	AreaGet         DFA //市全称
-	AreaDistrict    DFA //区或县
-	AreaProvinceGet DFA //省
-	AreaSimGet      DFA //市简称
-	AreaStreet      DFA //街道
-)
-var CitySimConfig map[string]map[string]interface{} = make(map[string]map[string]interface{}) //城市简称
-var CityAllConfig map[string]map[string]interface{} = make(map[string]map[string]interface{}) //城市全称
-var ProviceConfig map[string]interface{} = make(map[string]interface{})                       //省份
-var ProvinceMap map[string]string = make(map[string]string)
-var CityBrief map[string]*City = make(map[string]*City)             //只加载一次即可
-var ProvinceBrief map[string]*Province = make(map[string]*Province) //只加载一次
-var AreaToCity map[string][]*City = make(map[string][]*City)        //两个文件共用
-var DistrictCityMap map[string]*City = make(map[string]*City)
-var StreetDistrictMap map[string]*District = make(map[string]*District)
 
 func init() {
 	qu.ReadConfig("./extractcity.json", &SortField)
 }
-func TransmitData(resulttmp map[string]interface{}, id string) (bres bool, p, c, d string) {
+func (e *ExtractTask) TransmitData(resulttmp map[string]interface{}, id string) (bres bool, p, c, d string) {
 	province := fmt.Sprint(resulttmp["area"])
 	city := fmt.Sprint(resulttmp["city"])
-	field := make([]string, 0)
+	fieldval := make([]string, 0)
 	for _, f := range SortField { //
 		val := resulttmp[f]
 		if val == nil {
-			field = append(field, "")
+			fieldval = append(fieldval, "")
 		} else {
-			field = append(field, fmt.Sprint(val))
+			fieldval = append(fieldval, fmt.Sprint(val))
 		}
 	}
-	bres, c, p = ExtractProvinceCity(province, city, id, field) //抽取省和市
-	bres, p, c, d = ExtractDistrict(field, bres, c, p, id)      //抽取区或县
+	bres, c, p = e.ExtractProvinceCity(province, city, id, fieldval) //抽取省和市
+	bres, p, c, d = e.ExtractDistrict(fieldval, bres, c, p, id)      //抽取区或县
 	return
 }
 
 //抽取区或县(从配置的字段信息中抽取区或县)
-func ExtractDistrict(field []string, bres bool, c, p, id string) (bool, string, string, string) {
+func (e *ExtractTask) ExtractDistrict(field []string, bres bool, c, p, id string) (bool, string, string, string) {
 	d := ""
 	for _, str := range field {
-		for pos, GET := range []DFA{AreaDistrict, AreaStreet} { //先匹配区或县再匹配街道
+		//log.Println("field===========", str)
+		for pos, GET := range []DFA{e.AreaDistrict, e.AreaStreet} { //先匹配区或县再匹配街道
 			word := GET.CheckSensitiveWord(str)
+			//log.Println("word================", word)
 			if word != "" {
 				if pos == 0 { //区或县匹配
 					//log.Println("县直接匹配到====", word)
-					city := DistrictCityMap[word]
+					city := e.DistrictCityMap[word]
+					//log.Println("city================", city)
 					if city != nil {
 						d = word
 						ctmp := city.Brief
 						ptmp := city.P.Brief
+						//log.Println("ctmpptmp================", ptmp, ctmp)
 						if !bres { //城市省份没有抽到,通过区或县定位市和省
 							c = ctmp
 							p = ptmp
@@ -105,11 +94,13 @@ func ExtractDistrict(field []string, bres bool, c, p, id string) (bool, string,
 					}
 				} else { //街道匹配
 					//log.Println("匹配到街道====", word)
-					district := StreetDistrictMap[word]
+					district := e.StreetDistrictMap[word]
+					//log.Println("district================", district)
 					if district != nil {
 						d = district.Name
 						ctmp := district.C.Brief
 						ptmp := district.C.P.Brief
+						//log.Println("districtptmp================", ctmp, ptmp)
 						if !bres { //城市省份没有抽到,通过区或县定位市和省
 							c = ctmp
 							p = ptmp
@@ -131,15 +122,23 @@ func ExtractDistrict(field []string, bres bool, c, p, id string) (bool, string,
 }
 
 //抽取城市、省份
-func ExtractProvinceCity(province, city, id string, field []string) (bres bool, c, p string) {
+func (e *ExtractTask) ExtractProvinceCity(province, city, id string, text []string) (bres bool, c, p string) {
 	defer qu.Catch()
 	bc := true //是否继续抽取
 	if city != "" {
-		if CityBrief[city] == nil { //简称不存在
-			//log.Println("city err:", city, id)
+		lock.Lock()
+		citybrief := e.CityBrief[city]
+		//log.Println("citybrief========", citybrief)
+		lock.Unlock()
+		if citybrief == nil { //简称不存在
+			log.Println("city err:", city, id)
 		} else { //简称存在
-			if province != CityBrief[city].P.Brief { //省份不配对
-				//log.Println("province err:", city, province, id)
+			lock.Lock()
+			pbrief := e.CityBrief[city].P.Brief
+			//log.Println("pbrief========", pbrief)
+			lock.Unlock()
+			if province != pbrief { //省份不配对
+				log.Println("province err:", city, province, id)
 			} else {
 				bc = false
 				//城市省份都正确
@@ -148,13 +147,19 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 	}
 	//有省份
 	bp := false
-	if ProvinceBrief[province] != nil { //省份简称正确
+	lock.Lock()
+	provincebrief := e.ProvinceBrief[province]
+	//log.Println("provincebrief========", provincebrief)
+	lock.Unlock()
+	if provincebrief != nil { //省份简称正确
 		bp = true
 	} else { //没有省份,先识别省份
-		for _, str := range field { //没有省的简称,从配置的字段信息中抽取省
-			word := AreaProvinceGet.CheckSensitiveWord(str) //省全称DFA中匹配
+		for _, str := range text { //没有省的简称,从配置的字段信息中抽取省
+			word := e.AreaProvinceGet.CheckSensitiveWord(str) //省全称DFA中匹配
 			if word != "" {
-				province = ProvinceMap[word] //
+				lock.Lock()
+				province = e.ProvinceMap[word]
+				lock.Unlock()
 				bp = true
 				break
 			}
@@ -162,10 +167,9 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 	}
 	//匹配城市
 	if bc { //城市简称不存在CityBrief[city]==nil,或城市简称存在但省份不配对,继续抽取
-		//目前是全匹配模式,如果再加上精简匹配,加一层循环
-		for pos, GET := range []DFA{AreaGet, AreaSimGet} { //AreaGet市全称,AreaSimGet省全称和简称
+		for pos, GET := range []DFA{e.AreaGet, e.AreaSimGet} { //AreaGet市全称,AreaSimGet省全称和简称
 			ws := make([]string, 5)
-			for n, str := range field {
+			for n, str := range text {
 				if str != "" {
 					word := GET.CheckSensitiveWord(str)
 					if pos == 1 { //用简称 后辍为路、集团替换
@@ -176,7 +180,9 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 					}
 					ws[n] = word
 					if word != "" {
-						res := AreaToCity[word]
+						lock.Lock()
+						res := e.AreaToCity[word]
+						lock.Unlock()
 						if len(res) == 1 {
 							//判断省份
 							if !bp || province == res[0].P.Brief { //省份不存在或一致直接返回(!bp:省的简称)
@@ -186,7 +192,7 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 								break
 							} else { //不一致时。。暂时不处理
 							}
-						} else { //多个时(出现这种情况是多个省中的市,市名相同)
+						} else { //多个时(出现这种情况是多个省中的市,市名相同。现在的配置文件中已经将市名,县名重复的全部去掉)
 						}
 					}
 				}
@@ -194,7 +200,9 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 			if !bres { //没有匹配到
 				mc := map[string]int{}
 				for _, w := range ws {
-					res := AreaToCity[w]
+					lock.Lock()
+					res := e.AreaToCity[w]
+					lock.Unlock()
 					for _, ct := range res {
 						if ct == nil {
 							continue
@@ -218,16 +226,22 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 				}
 				if v != "" {
 					bres = true
-					c = CityBrief[v].Brief
-					p = CityBrief[v].P.Brief
+					lock.Lock()
+					ctb := e.CityBrief[v]
+					lock.Unlock()
+					c = ctb.Brief
+					p = ctb.P.Brief
 				} else if len(mc) > 0 {
 					//取级别更大的
 					v := ""
 					for mk, _ := range mc {
-						if CityBrief[mk].P.Cap == mk {
+						lock.Lock()
+						cb := e.CityBrief[mk]
+						lock.Unlock()
+						if cb.P.Cap == mk {
 							bres = true
-							c = CityBrief[mk].Brief
-							p = CityBrief[mk].P.Brief
+							c = cb.Brief
+							p = cb.P.Brief
 							break
 						} else {
 							v = mk
@@ -235,8 +249,11 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 					}
 					if !bres {
 						bres = true
-						c = CityBrief[v].Brief
-						p = CityBrief[v].P.Brief
+						lock.Lock()
+						cbb := e.CityBrief[v]
+						c = cbb.Brief
+						p = cbb.P.Brief
+						lock.Unlock()
 					}
 				}
 			}
@@ -249,9 +266,12 @@ func ExtractProvinceCity(province, city, id string, field []string) (bres bool,
 	}
 	if !bres {
 		//取默认省会
-		if ProvinceBrief[province] != nil {
+		lock.Lock()
+		pbp := e.ProvinceBrief[province]
+		lock.Unlock()
+		if pbp != nil {
 			bres = true
-			c = ProvinceBrief[province].Cap
+			c = pbp.Cap
 			p = province
 		}
 	}

+ 8 - 5
src/jy/extract/extractudp.go

@@ -93,11 +93,14 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 	ext.InitRuleCore()
 	ext.InitTag()
 	ext.InitClearFn()
-
-	ext.InitProvince()
-	ext.InitCityAll()
-	ext.InitCitySim()
-	InitDFA()
+	if ext.IsExtractCity { //版本上控制是否开始城市抽取
+		//初始化城市DFA信息
+		ext.InitDFA()
+	}
+	//质量审核
+	ext.InitAuditRule()
+	ext.InitAuditClass()
+	ext.InitAuditRecogField()
 
 	go ext.ResultSave()
 	go ext.BidSave()

+ 0 - 8
src/main_test.go

@@ -51,14 +51,6 @@ func Test_paths(t *testing.T) {
 	}
 }
 
-func Test_city(t *testing.T) {
-	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
-	extract.StartExtractTaskId("5b8f804025e29a290415aee1")
-	log.Println(len(extract.ProviceConfig), len(extract.CityAllConfig), len(extract.CitySimConfig))
-	extract.InitDFA()
-	time.Sleep(300 * time.Second)
-}
-
 func Test_arr(t *testing.T) {
 	var DistrictToCity map[string]interface{} = make(map[string]interface{})
 	log.Println(DistrictToCity["a"])

+ 1 - 0
src/web/templates/admin/audit_classlist.html

@@ -127,6 +127,7 @@ $(function () {
 					_tit="新增_"+fname+"分类";
 				}else{
 					_tit="编辑_"+fname+"分类";
+					tag[0]={label:"名称",s_label:"s_name",disabled:true}
 				}
 				htmlObj={
 					mutilfield:"s_pid",

+ 1 - 1
src/web/templates/admin/audit_recogfield.html

@@ -112,7 +112,7 @@ $(function () {
 								if (bcon){								
 									$.post("/admin/rulemanager/saverecogfield",obj,function(data){
 										if(data&&data.rep){
-											window.location.href="/admin/audit/rulemanager"	
+											window.location.href="/admin/audit/recogfield"	
 										}else{
 											showTip(data.msg,1000)
 										}

+ 21 - 9
src/web/templates/admin/version.html

@@ -76,7 +76,7 @@ $(function () {
 				}
 				return tmp
 			}},
-			{"data":"_id","width":"30%",render:function(val,a,row){
+			{ "data":"_id","width":"30%",render:function(val,a,row){
 				tmp = '<div class="btn-group">'+
 					'<a class="btn btn-sm btn-primary" href="/admin/rulepre?version='+row.version+'">前置规则</a>'+
 					'<a class="btn btn-sm btn-info" href="/admin/ruleback?version='+row.version+'">后置规则</a>'+
@@ -85,8 +85,9 @@ $(function () {
 					'</div>';
 				return  tmp
 			}},
-			{"data":"_id",render:function(val,a,row){
+			{ "data":"_id",render:function(val,a,row){
 				return '<a class="btn btn-sm btn-success" href="/admin/version/info?vid='+val+'" >属性配置</a>'+
+					'&nbsp;&nbsp;<a class="btn btn-sm btn-primary opr" opr="edit">编&nbsp;&nbsp;辑</a>'+
 					'&nbsp;&nbsp;<a class="btn btn-sm btn-danger" href="#" onclick="del(\''+val+'\')">删&nbsp;&nbsp;除</a>'
 			}}
        	]
@@ -96,18 +97,24 @@ $(function () {
 			var n=$(this).attr("opr")
 			var _tit="",htmlObj={},obj,tag=[]
 			switch(n){
-			case "edit":			
+			case "edit":
 				obj=ttable.row($(this).closest("tr")).data();
 			case "new":
-				_tit="新增版本"
 				tag=[
 					{label:"版本名称",s_label:"version",placeholder:"版本名称",must:true},
 					{label:"描述",s_label:"s_descript",placeholder:"描述信息",must:true},
+					{label:"城市抽取",s_label:"isextractcity",type:"tpl_list_local",list:[{"s_name":"是","_id":true},{"s_name":"否","_id":false}],default:false},
 					{label:"克隆版本",s_label:"s_pversionid",type:"tpl_list_local",url:"/admin/getversions"},
 					{label:"克隆属性",s_label:"iscopyfiled",type:"tpl_list_local",list:[{"s_name":"是","_id":true},{"s_name":"否","_id":false}],default:false},
 					{s_label:"isuse",type:"tpl_hidden"},
+					{s_label:"_id",type:"tpl_hidden"}
 				]
-				obj={"isuse":false}
+				if(n == "new"){
+					_tit="新增版本";
+				}else{
+					_tit="编辑版本";
+					 tag[0]={label:"版本名称",s_label:"version",placeholder:"版本名称",must:true,disabled:true}
+				}
 				htmlObj={
 					title:_tit,
 					tag:tag,
@@ -124,12 +131,17 @@ $(function () {
 										return false
 									}
 								})
-								if (bcon){								
+								if (bcon){	
+									console.log(typeof(obj.isuse),obj.isuse)
+									if(obj.isuse != "true"){
+										obj.isuse = false
+									}
+									console.log(obj.isuse)
 									$.post("/admin/version/save",obj,function(data){
 										if(data&&data.rep){
 											window.location.href="/admin/version"			
 										}else{
-											showTip(data.msg,1000)
+											showTip("已存在!",1000)
 										}
 									},'json')
 								}else{
@@ -159,7 +171,7 @@ function use(_id,isuse){
 			data:{"_id":_id,"isuse":isuse},
 			success:function(r){
 				if(r.rep){				
-					ttable.ajax.reload();
+					window.location.href="/admin/version"	
 				}else{
 					showTip("启用失败", 1000, function() {});
 				}
@@ -175,7 +187,7 @@ function del(_id){
 			data:{"_id":_id},
 			success:function(r){
 				if(r.rep){				
-					ttable.ajax.reload();
+					window.location.href="/admin/version"
 				}else{
 					showTip("删除失败", 1000, function() {});
 				}

+ 1 - 1
src/web/templates/admin/versioninfo.html

@@ -140,7 +140,7 @@ $(function () {
 										if(data&&data.rep){
 											window.location.href="/admin/version/info?vid={{.vid}}"			
 										}else{
-											showTip(data.msg,1000)
+											showTip("已存在!",1000)
 										}
 									},'json')
 								}else{