Browse Source

delete:true

unknown 6 years ago
parent
commit
6abed7f60d
5 changed files with 92 additions and 38 deletions
  1. 9 9
      src/jy/admin/tag.go
  2. 39 13
      src/jy/extract/extract.go
  3. 35 16
      src/jy/extract/extractInit.go
  4. 1 0
      src/jy/util/article.go
  5. 8 0
      src/main_test.go

+ 9 - 9
src/jy/admin/tag.go

@@ -38,7 +38,7 @@ func init() {
 //标签库列表
 func TagData(c *gin.Context) {
 	version, _ := c.GetPostForm("version")
-	data, _ := Mgo.Find("tag", `{"s_version":"`+version+`","delete":true}`, `{"_id":-1}`, nil, false, -1, -1)
+	data, _ := Mgo.Find("tag", `{"s_version":"`+version+`","delete":false}`, `{"_id":-1}`, nil, false, -1, -1)
 	for _, d := range *data {
 		timeStr := time.Unix(d["l_intime"].(int64), 0).Format(Date_Short_Layout)
 		d["l_intime"] = timeStr
@@ -52,7 +52,7 @@ func TagSave(c *gin.Context) {
 	version, _ := c.GetPostForm("version")
 	username, _ := c.GetPostForm("username")
 	tp, _ := c.GetPostForm("tp")
-	data, _ := Mgo.FindOne("tag", `{"s_tagname":"`+tagname+`","s_version":"`+version+`","delete":true}`)
+	data, _ := Mgo.FindOne("tag", `{"s_tagname":"`+tagname+`","s_version":"`+version+`","delete":false}`)
 	if len(*data) > 0 { //判重,防止添加name相同的标签
 		c.JSON(200, gin.H{"rep": false})
 		return
@@ -63,7 +63,7 @@ func TagSave(c *gin.Context) {
 		"s_creater": username,
 		"s_type":    tp,
 		"l_intime":  time.Now().Unix(),
-		"delete":    true,
+		"delete":    false,
 	}
 	b := Mgo.Save("tag", save)
 	if b != "" {
@@ -77,7 +77,7 @@ func TagSave(c *gin.Context) {
 func TagDel(c *gin.Context) {
 	_id, _ := c.GetPostForm("_id")
 	b := Mgo.Update("tag", `{"_id":"`+_id+`"}`, map[string]interface{}{
-		"$set": map[string]interface{}{"delete": false},
+		"$set": map[string]interface{}{"delete": true},
 	}, false, false)
 	data, _ := Mgo.Find("tagdetailinfo", `{"s_parentid":"`+_id+`"}`, `{"_id":-1}`, nil, false, -1, -1)
 	if len(*data) > 0 { //删除父标签下所有详细标签
@@ -85,7 +85,7 @@ func TagDel(c *gin.Context) {
 			cid := d["_id"].(bson.ObjectId).Hex()
 			//Mgo.Del("tagdetailinfo", `{"_id":"`+cid+`"}`)
 			Mgo.Update("tagdetailinfo", `{"_id":"`+cid+`"}`, map[string]interface{}{
-				"$set": map[string]interface{}{"delete": false},
+				"$set": map[string]interface{}{"delete": true},
 			}, false, false)
 		}
 	}
@@ -100,7 +100,7 @@ func TagDel(c *gin.Context) {
 func OneTagData(c *gin.Context) {
 	version, _ := c.GetPostForm("version")
 	parentid, _ := c.GetPostForm("parentid")
-	data, _ := Mgo.Find("tagdetailinfo", `{"s_version":"`+version+`","s_parentid":"`+parentid+`","delete":true}`, `{"_id":-1}`, nil, false, -1, -1)
+	data, _ := Mgo.Find("tagdetailinfo", `{"s_version":"`+version+`","s_parentid":"`+parentid+`","delete":false}`, `{"_id":-1}`, nil, false, -1, -1)
 	for _, d := range *data {
 		timeStr := time.Unix(d["l_intime"].(int64), 0).Format(Date_Short_Layout)
 		d["l_intime"] = timeStr
@@ -113,7 +113,7 @@ func OneTagCreate(c *gin.Context) {
 	name, _ := c.GetPostForm("name")
 	version, _ := c.GetPostForm("version")
 	parentid, _ := c.GetPostForm("parentid")
-	data, _ := Mgo.FindOne("tagdetailinfo", `{"s_name":"`+name+`","s_parentid":"`+parentid+`","s_version":"`+version+`","delete":true}`)
+	data, _ := Mgo.FindOne("tagdetailinfo", `{"s_name":"`+name+`","s_parentid":"`+parentid+`","s_version":"`+version+`","delete":false}`)
 	if len(*data) > 0 { //判重
 		c.JSON(200, gin.H{"rep": false})
 		return
@@ -140,7 +140,7 @@ func OneTagCreate(c *gin.Context) {
 		"l_intime":     time.Now().Unix(),
 		"content":      jsondata,
 		"s_parentid":   parentid,
-		"delete":       true,
+		"delete":       false,
 	}
 	b := Mgo.Save("tagdetailinfo", save)
 	if b != "" {
@@ -154,7 +154,7 @@ func OneTagCreate(c *gin.Context) {
 func OneTagDel(c *gin.Context) {
 	_id, _ := c.GetPostForm("_id")
 	b := Mgo.Update("tagdetailinfo", `{"_id":"`+_id+`"}`, map[string]interface{}{
-		"$set": map[string]interface{}{"delete": false},
+		"$set": map[string]interface{}{"delete": true},
 	}, false, false)
 	if b {
 		c.JSON(200, gin.H{"rep": true})

+ 39 - 13
src/jy/extract/extract.go

@@ -25,20 +25,21 @@ var (
 	TaskList  map[string]*ExtractTask                //任务列表
 	saveLimit = 200                                  //抽取日志批量保存
 
-	CitySimConfig   map[string]map[string]interface{} //城市简称
-	CityAllConfig   map[string]map[string]interface{} //城市全称
-	ProviceConfig   map[string]interface{}            //省份
-	ProvinceMap     map[string]string
-	CityBrief       map[string]*City     //只加载一次即可
-	ProvinceBrief   map[string]*Province //只加载一次
-	AreaToCity      map[string][]*City   //两个文件共用
-	AreaGet         DFA                  //敏感词
-	AreaProvinceGet DFA                  //敏感词
-	AreaSimGet      DFA                  //敏感词
+	AreaGet         DFA //敏感词
+	AreaProvinceGet DFA //敏感词
+	AreaSimGet      DFA //敏感词
 
 	Fields = `{"title":1,"detail":1,"contenthtml":1,"href":1,"site":1,"spidercode":1,"toptype":1,"area":1,"city":1}`
 )
 
+var CitySimConfig map[string]map[string]interface{} = make(map[string]map[string]interface{}) //城市简称
+var CityAllConfig map[string]map[string]interface{} = make(map[string]map[string]interface{}) //城市全称
+var ProviceConfig map[string]interface{} = make(map[string]interface{})                       //省份
+var ProvinceMap map[string]string = make(map[string]string)
+var CityBrief map[string]*City = make(map[string]*City)             //只加载一次即可
+var ProvinceBrief map[string]*Province = make(map[string]*Province) //只加载一次
+var AreaToCity map[string][]*City = make(map[string][]*City)        //两个文件共用
+
 //启动测试抽取
 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool {
 	defer qu.Catch()
@@ -97,6 +98,11 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitRuleCore()
 	ext.InitTag()
 	ext.InitClearFn()
+
+	ext.InitProvince()
+	ext.InitCityAll()
+	ext.InitCitySim()
+
 	return true
 	ext.IsRun = true
 	if isgo {
@@ -181,6 +187,7 @@ func PreInfo(doc map[string]interface{}) *ju.Job {
 		City:       qu.ObjToString(doc["city"]),
 		Province:   qu.ObjToString(doc["area"]),
 		Result:     map[string][]*ju.ExtField{},
+		//BuyerAddr:  qu.ObjToString(doc["buyeraddr"]),
 	}
 	pretreated.AnalyStart(j)
 	return j
@@ -238,8 +245,8 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 		//log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
 
 		//抽取省份城市县
-		fmt.Println("-----------", j.Province, j.City, j.Title)
-		//ExtractPC()
+		//fmt.Println("-----------", j.Province, j.City, j.BuyerAddr, j.Title) //j.Address
+		//ExtractPC(j.Result, j.Province, j.City, j.Title, j.BuyerAddr, j.SourceMid) //j.Address
 		//分析抽取结果并保存 todo
 		AnalysisSaveResult(j.Data, j.Result, e.TaskInfo)
 
@@ -762,6 +769,25 @@ func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.Ext
 }
 
 //抽取城市、省份
+func ExtractPC2(result map[string][]*ju.ExtField, province, city, title, addr, sourcemid string) (bres bool, c, p string) {
+	pjnarr := make([]string, 0)   //获取所有项目名称结果集
+	buyerarr := make([]string, 0) //获取所有采购单位结果集
+	for key, val := range result {
+		if key == "projectname" {
+			for _, v := range val {
+				p := fmt.Sprint(v.Value)
+				pjnarr = append(pjnarr, p)
+			}
+		}
+		if key == "buyer" {
+			for _, v := range val {
+				b := fmt.Sprint(v.Value)
+				buyerarr = append(buyerarr, b)
+			}
+		}
+	}
+	return
+}
 func ExtractPC(buyer, projectname, title, city, province, addr string, id interface{}) (bres bool, c, p string) {
 	defer qu.Catch()
 	bc := true //是否继续抽取
@@ -792,7 +818,7 @@ func ExtractPC(buyer, projectname, title, city, province, addr string, id interf
 		}
 	}
 	//匹配城市
-	if bc {
+	if bc { //简称不存在CityBrief[city]==nil 或省份不对
 		//目前是全匹配模式,如果再加上精简匹配,加一层循环
 		for pos, GET := range []DFA{AreaGet, AreaSimGet} {
 			ws := make([]string, 5)

+ 35 - 16
src/jy/extract/extractInit.go

@@ -2,7 +2,6 @@
 package extract
 
 import (
-	"fmt"
 	db "jy/mongodbutil"
 	"log"
 	qu "qfw/util"
@@ -379,7 +378,13 @@ func (e *ExtractTask) InitProvince() {
 	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"province","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *list {
 		name := qu.ObjToString(v["s_name"])
-		fn[name] = qu.ObjArrToStringArr(v["content"].([]interface{}))
+		content := v["content"]
+		switch content.(type) {
+		case string:
+			fn[name] = []interface{}{content.(string)}
+		case []interface{}:
+			fn[name] = content
+		}
 	}
 	ProviceConfig = fn
 }
@@ -401,9 +406,9 @@ func (e *ExtractTask) InitCitySim() {
 func (e *ExtractTask) InitCityAll() {
 	defer qu.Catch()
 	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"cityall","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
-	if len(*list) != 34 {
-		fmt.Println("加载城市配置文件出错", len(*list))
-	}
+	//	if len(*list) != 34 {
+	//		fmt.Println("加载城市配置文件出错", len(*list))
+	//	}
 	fn := map[string]map[string]interface{}{}
 	for _, v := range *list {
 		name := qu.ObjToString(v["s_name"])
@@ -414,25 +419,31 @@ func (e *ExtractTask) InitCityAll() {
 }
 
 //初始化城市省份敏感词
-func InitDfa() {
+func InitDFA() {
 	AreaGet = DFA{}
 	AreaProvinceGet = DFA{}
 	for k, v := range ProviceConfig {
+		log.Println(k, "----------", v)
 		for _, p := range v.([]interface{}) {
+			log.Println("ppppp", p)
 			p1, _ := p.(string)
 			AreaProvinceGet.AddWord(p1)
 			ProvinceMap[p1] = k
 		}
 	}
+	log.Println("ProvinceMap11----", ProvinceMap)
 	for k, v := range CityAllConfig {
-		AreaProvinceGet.AddWord(k)
+		AreaProvinceGet.AddWord(k) //省全称
 		p := &Province{}
 		p.Name = k
 		p.Brief = v["brief"].(string)
 		ProvinceMap[k] = p.Brief
+		log.Println("ProvinceMap22----", ProvinceMap)
 		ProvinceBrief[p.Brief] = p
 		p.Cap = v["captial"].(string)
+		log.Println("ProvinceBrief11====", p.Brief, ProvinceBrief[p.Brief].Name, ProvinceBrief[p.Brief].Brief, "==", ProvinceBrief[p.Brief].Cap)
 		city, _ := v["city"].(map[string]interface{})
+		log.Println("======================================================")
 		for k1, v1 := range city {
 			v1m, _ := v1.(map[string]interface{})
 			c := &City{}
@@ -447,15 +458,17 @@ func InitDfa() {
 			if c.Brief == p.Cap {
 				p.Captial = c
 			}
+			log.Println("CityBrief11+++", k1, "---", CityBrief[c.Brief].Name, CityBrief[c.Brief].Brief, "===", CityBrief[c.Brief].P.Captial, "===", CityBrief[c.Brief].P.Name)
 			//加入到城市map中
 			cs := AreaToCity[k1]
-			AreaGet.AddWord(k1)
+			AreaGet.AddWord(k1) //市全称
 			if cs != nil {
 				cs = append(cs, c)
 			} else {
 				cs = []*City{c}
 			}
 			AreaToCity[k1] = cs
+			log.Println("市---", k1, AreaToCity[k1][0].Brief, AreaToCity[k1][0].Name, AreaToCity[k1][0].P.Name)
 
 			/*
 				AreaToCity["衢州市"] = []interface{}{
@@ -471,25 +484,28 @@ func InitDfa() {
 			for _, k2 := range arr {
 				s := k2.(string)
 				cs := AreaToCity[s]
-				AreaGet.AddWord(s)
+				AreaGet.AddWord(s) //街道全称
 				if cs != nil {
 					cs = append(cs, c)
 				} else {
 					cs = []*City{c}
 				}
 				AreaToCity[s] = cs
+				log.Println("街道===", k2, AreaToCity)
 			}
 		}
 	}
+	log.Println("======================================================")
 	//加载简称
 	AreaSimGet = DFA{}
 	//util.ReadConfig("./city_sim.json", &CitySimConfig)
-	if len(CitySimConfig) != 34 {
-		log.Println("加载简称配置文件出错", len(CitySimConfig))
-	}
+	//	if len(CitySimConfig) != 34 {
+	//		log.Println("加载简称配置文件出错", len(CitySimConfig))
+	//	}
 	for k, v := range CitySimConfig {
 		pb := v["brief"].(string)
 		p := ProvinceBrief[pb]
+		log.Println("++++++++++++++++++", p)
 		//加载
 		for _, ss := range []string{k, pb} {
 			cs := AreaToCity[ss]
@@ -499,7 +515,8 @@ func InitDfa() {
 				cs = []*City{p.Captial}
 			}
 			AreaToCity[ss] = cs
-			AreaSimGet.AddWord(ss)
+			log.Println("+++", ss, AreaToCity)
+			AreaSimGet.AddWord(ss) //省全称和省简称
 		}
 		city, _ := v["city"].(map[string]interface{})
 		for k1, v1 := range city {
@@ -510,7 +527,7 @@ func InitDfa() {
 			cb := v1m["brief"].(string)
 			c := AreaToCity[k1][0]
 			//加入到城市map中
-			for _, ss := range []string{cb, k + cb, pb + cb} {
+			for _, ss := range []string{cb, k + cb, pb + cb} { //杭州 浙江省杭州  浙江杭州
 				AreaSimGet.AddWord(ss)
 				cs := AreaToCity[ss]
 				if cs != nil {
@@ -519,11 +536,12 @@ func InitDfa() {
 					cs = []*City{c}
 				}
 				AreaToCity[ss] = cs
+				log.Println("+-+-", ss, AreaToCity)
 			}
 			arr := v1m["area"].([]interface{})
 			for _, k2 := range arr {
 				s := k2.(string)
-				for _, ss := range []string{s, cb + s, pb + s, k + s} {
+				for _, ss := range []string{s, cb + s, pb + s, k + s} { //淳安 杭州淳安 浙江淳安 浙江省淳安
 					cs := AreaToCity[ss]
 					AreaSimGet.AddWord(ss)
 					if cs != nil {
@@ -532,10 +550,12 @@ func InitDfa() {
 						cs = []*City{c}
 					}
 					AreaToCity[ss] = cs
+					log.Println("-+-+", ss, AreaToCity)
 				}
 			}
 		}
 	}
+	log.Println(AreaToCity)
 }
 
 func (d *DFA) AddWord(keys ...string) {
@@ -568,7 +588,6 @@ func (d *DFA) AddWordAll(haskey bool, keys ...string) {
 	}
 }
 
-//匹配最长
 func (d *DFA) CheckSensitiveWord(src string) string {
 	pos := 0
 	nowMap := &d.Link

+ 1 - 0
src/jy/util/article.go

@@ -14,6 +14,7 @@ type Job struct {
 	Data       *map[string]interface{} //数据库源数据
 	Block      []*Block                //分块
 	Result     map[string][]*ExtField  //结果
+	//BuyerAddr  string                  //采购单位地址
 }
 
 type ExtField struct {

+ 8 - 0
src/main_test.go

@@ -42,3 +42,11 @@ func Test_paths(t *testing.T) {
 		break
 	}
 }
+
+func Test_city(t *testing.T) {
+	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
+	extract.StartExtractTaskId("5b8f804025e29a290415aee1")
+	log.Println(len(extract.ProviceConfig), len(extract.CityAllConfig), len(extract.CitySimConfig))
+	extract.InitDFA()
+	time.Sleep(300 * time.Second)
+}