fengweiqiang 5 years ago
parent
commit
b8258ddbb6

+ 41 - 15
src/jy/admin/rule.go

@@ -62,9 +62,9 @@ func init() {
 		_id, _ := c.GetPostForm("_id")
 		//b := Mgo.Del("rule_logic", `{"_id":"`+_id+`"}`)
 		b := Mgo.UpdateById("rule_logic", _id, `{"$set":{"delete":true}}`)
-		go DelLogicPre(_id,false)
-		go DelLogicCore(_id,false)
-		go DelLogicBack(_id,false)
+		go DelLogicPre(_id, false)
+		go DelLogicCore(_id, false)
+		go DelLogicBack(_id, false)
 		c.JSON(200, gin.H{"rep": b})
 	})
 	Admin.POST("/rulelogic/use", RuleLogicUse)
@@ -477,11 +477,17 @@ func RuleLogicCoreUse(c *gin.Context) {
 	c.JSON(200, gin.H{"rep": b})
 }
 
-func DelVersionInfo(vid string) { //刪除属性配置
-	vInfo, _ := Mgo.Find("versioninfo", `{"vid":"`+vid+`","delete":false}`, nil, `{"_id":1}`, false, -1, -1)
-	Mgo.Update("versioninfo", `{"vid":"`+vid+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
+func DelVersionInfo(vid string, isSite bool) { //刪除属性配置
+	var versioninfodb string
+	if isSite {
+		versioninfodb = "site_versioninfo"
+	} else {
+		versioninfodb = "versioninfo"
+	}
+	vInfo, _ := Mgo.Find(versioninfodb, `{"vid":"`+vid+`","delete":false}`, nil, `{"_id":1}`, false, -1, -1)
+	Mgo.Update(versioninfodb, `{"vid":"`+vid+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
 	for _, v := range *vInfo {
-		DelLogic(vid, qu.BsonIdToSId((v)["_id"]), false)
+		DelLogic(vid, qu.BsonIdToSId((v)["_id"]), isSite)
 	}
 }
 
@@ -552,19 +558,39 @@ func DelRulePre(s_version string) { //删除通用前置过滤
 	Mgo.Update("rule_pre", `{"s_version":"`+s_version+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
 }
 
-func DelRuleBack(s_version string) { //删除通用后置过滤
-	Mgo.Update("rule_back", `{"s_version":"`+s_version+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
+func DelRuleBack(s_version string, isSite bool) { //删除通用后置过滤
+	var rule_backdb string
+	if isSite {
+		rule_backdb = "site_rule_back"
+	} else {
+		rule_backdb = "rule_back"
+	}
+	Mgo.Update(rule_backdb, `{"s_version":"`+s_version+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
 }
-func DelTag(s_version string) {
-	tag, _ := Mgo.Find("tag", `{"s_version":"`+s_version+`","delete":false}`, nil, `{"_id":1}`, false, -1, -1)
-	Mgo.Update("tag", `{"s_version":"`+s_version+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
+func DelTag(s_version string, isSite bool) {
+	var tagdb, tagdetailinfodb string
+	if isSite {
+		tagdb = "site_tag"
+		tagdetailinfodb = "site_tagdetailinfo"
+	} else {
+		tagdb = "tag"
+		tagdetailinfodb = "tagdetailinfo"
+	}
+	tag, _ := Mgo.Find(tagdb, `{"s_version":"`+s_version+`","delete":false}`, nil, `{"_id":1}`, false, -1, -1)
+	Mgo.Update(tagdb, `{"s_version":"`+s_version+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
 	for _, t := range *tag {
 		s_parentid := qu.BsonIdToSId(t["_id"])
-		Mgo.Update("tagdetailinfo", `{"s_version":"`+s_version+`","s_parentid":"`+s_parentid+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
+		Mgo.Update(tagdetailinfodb, `{"s_version":"`+s_version+`","s_parentid":"`+s_parentid+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
 	}
 }
-func DelCleanUp(s_version string) {
-	Mgo.Update("cleanup", `{"s_version":"`+s_version+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
+func DelCleanUp(s_version string,isSite bool) {
+	var cleanupdb string
+	if isSite{
+		cleanupdb = "site_cleanup"
+	}else {
+		cleanupdb = "cleanup"
+	}
+	Mgo.Update(cleanupdb, `{"s_version":"`+s_version+`","delete":false}`, `{"$set":{"delete":true}}`, false, true)
 }
 
 func ClearRuleLogicData(c *gin.Context) {

+ 9 - 6
src/jy/admin/rulecheck.go

@@ -307,7 +307,7 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 //lua脚本前置过滤验证
 func checkPreScript(code, name, infoid, script string) map[string]interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _ := extract.PreInfo(*doc)
+	j, _,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block
@@ -323,11 +323,14 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 	e.IsRun = true
 	e.TaskInfo = &extract.TaskInfo{Version: qu.ObjToString((*vsion)["s_version"]), VersionId: qu.BsonIdToSId((*vsion)["_id"]), TestLua: true}
 	e.InitRulePres()
-	e.InitRuleBacks()
-	e.InitRuleCore()
-	e.InitTag()
+	e.InitRuleBacks(false)
+	e.InitRuleBacks(true)
+	e.InitRuleCore(false)
+	e.InitRuleCore(true)
+	e.InitTag(false)
+	e.InitTag(true)
 	tmp, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _ := extract.PreInfo(*tmp)
+	j, _,_ := extract.PreInfo(*tmp)
 	doc := *j.Data
 	//全局前置规则,结果覆盖doc属性
 	for _, v := range e.RulePres {
@@ -390,7 +393,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 //lua脚本抽取验证
 func checkCoreScript(code, name, infoid, script string) interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _ := extract.PreInfo(*doc)
+	j, _,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block

+ 49 - 25
src/jy/admin/site_management.go

@@ -7,6 +7,7 @@ import (
 	"github.com/gin-contrib/sessions"
 	"github.com/gin-gonic/gin"
 	"gopkg.in/mgo.v2/bson"
+	"jy/extract"
 	. "jy/mongodbutil"
 	"net/http"
 	"net/url"
@@ -37,30 +38,48 @@ func init() {
 	}) //版本站点列表
 	Admin.POST("/site_management/byId", func(c *gin.Context) {
 		_id := c.PostForm("_id")
-		if  !bson.IsObjectIdHex(_id) {
+		if !bson.IsObjectIdHex(_id) {
 			c.JSON(400, gin.H{"rep": false})
 			return
 		}
 		data, _ := Mgo.FindById("site_management", _id, bson.M{})
-		c.JSON(200, gin.H{"rep": true,"data":data})
+		c.JSON(200, gin.H{"rep": true, "data": data})
 	})
 
 	//站点信息保存
 	Admin.POST("/site_management/save", func(c *gin.Context) {
 		_id, _ := c.GetPostForm("_id")
 		data := GetPostForm(c)
+		if data["site_name"] == nil || data["site_href"] == nil {
+			c.JSON(400, gin.H{"rep": false})
+			return
+		}
 		u, _ := url.Parse(data["site_href"].(string))
 		if u != nil && u.Scheme == "" {
 			data["site_href"] = "http://" + strings.TrimSpace(data["site_href"].(string))
 		}
-		if scripts,ok := data["site_script"].(string);ok{
-			data["site_script"] = strings.Split(scripts,",")
+		if scripts, ok := data["site_script"].(string); ok {
+			if _id != "" {
+				tt, _ := Mgo.FindById("site_management", _id, `{"site_script":1}`)
+				if dbv, ok := (*tt)["site_script"].([]interface{}); ok {
+					for _, vv := range dbv {
+						extract.Luacodes.Delete(vv)
+					}
+				} else if dbv, ok := (*tt)["site_script"].(interface{}); ok {
+					extract.Luacodes.Delete(dbv)
+				}
+			}
+			luacodes := strings.Split(scripts, ",")
+			for _, v := range luacodes {
+				extract.Luacodes.Store(v, true)
+			}
+			data["site_script"] = luacodes
 		}
-		if data["isuse"] == nil{
+		if data["isuse"] == nil {
 			data["isuse"] = "true"
 		}
-		if data["vid"] != nil && bson.IsObjectIdHex(data["vid"].(string)){
-			if aa,b:= Mgo.Find("version",bson.M{"_id":bson.ObjectIdHex(data["vid"].(string))},nil,bson.M{"version":1},true,-1,-1);b {
+		if data["vid"] != nil && bson.IsObjectIdHex(data["vid"].(string)) {
+			if aa, b := Mgo.Find("version", bson.M{"_id": bson.ObjectIdHex(data["vid"].(string))}, nil, bson.M{"version": 1}, true, -1, -1); b {
 				data["version"] = (*aa)[0]["version"]
 			}
 		}
@@ -68,10 +87,6 @@ func init() {
 			Mgo.UpdateById("site_management", _id, map[string]interface{}{"$set": data})
 			c.JSON(200, gin.H{"rep": true})
 		} else {
-			if data["site_name"] == nil || data["site_href"] == nil  {
-				c.JSON(400, gin.H{"rep": false})
-				return
-			}
 			Mgo.Save("site_management", data)
 			c.JSON(200, gin.H{"rep": true, "vid": data["vid"]})
 		}
@@ -79,10 +94,18 @@ func init() {
 	//站点信息删除
 	Admin.POST("/site_management/delete", func(c *gin.Context) {
 		_id, _ := c.GetPostForm("_id")
-		if _id == "" || !bson.IsObjectIdHex(_id)  {
+		if _id == "" || !bson.IsObjectIdHex(_id) {
 			c.JSON(400, gin.H{"rep": false})
 		} else {
-			Mgo.Del("site_management",bson.M{"_id":bson.ObjectIdHex(_id)})
+			tt, _ := Mgo.FindById("site_management", _id, `{"site_script":1}`)
+			if dbv, ok := (*tt)["site_script"].([]interface{}); ok {
+				for _, vv := range dbv {
+					extract.Luacodes.Delete(vv)
+				}
+			} else if dbv, ok := (*tt)["site_script"].(interface{}); ok {
+				extract.Luacodes.Delete(dbv)
+			}
+			Mgo.Del("site_management", bson.M{"_id": bson.ObjectIdHex(_id)})
 			c.JSON(200, gin.H{"rep": true})
 		}
 	})
@@ -99,7 +122,7 @@ func init() {
 		pid := c.Query("pid")
 		c.HTML(
 			http.StatusOK, "site_versioninfo.html",
-			gin.H{"vid": vid, "pid":pid},
+			gin.H{"vid": vid, "pid": pid},
 		)
 	})
 	//属性配置
@@ -107,7 +130,7 @@ func init() {
 		vid := c.PostForm("vid")
 		pid := c.PostForm("pid")
 		list, _ := Mgo.Find("site_versioninfo", `{"vid":"`+vid+`","pid":"`+pid+`","delete":false}`, `{"_id":-1}`, nil, false, -1, -1)
-		c.JSON(200, gin.H{"data": list, "vid": vid,"pid":pid})
+		c.JSON(200, gin.H{"data": list, "vid": vid, "pid": pid})
 	})
 	//属性保存
 	Admin.POST("/site_management/infosave", func(c *gin.Context) {
@@ -130,7 +153,7 @@ func init() {
 				pid := Mgo.Save("site_versioninfo", data)
 				fromvid, _ := data["s_pversionid"].(string)
 				if fromvid != "" {
-					copyFieldRules(vid, pid, s_field, fromvid, sessions.Default(c).Get("username").(string),true)
+					copyFieldRules(vid, pid, s_field, fromvid, sessions.Default(c).Get("username").(string), true)
 				}
 				c.JSON(200, gin.H{"rep": true})
 			}
@@ -148,7 +171,7 @@ func init() {
 		_id, _ := c.GetPostForm("_id")
 		b := Mgo.UpdateById("site_versioninfo", _id, `{"$set":{"delete":true}}`)
 		v, _ := Mgo.FindById("site_versioninfo", _id, `{"vid":1}`)
-		go DelLogic(util.ObjToString((*v)["vid"]), _id,true)
+		go DelLogic(util.ObjToString((*v)["vid"]), _id, true)
 		//b := Mgo.Del("site_versioninfo", `{"_id":"`+_id+`"}`)
 		c.JSON(200, gin.H{"rep": b})
 	})
@@ -158,7 +181,7 @@ func init() {
 		vid := c.Query("vid")
 		p_pid := c.Query("p_pid")
 		pid := c.Query("pid")
-		c.HTML(200, "site_rule_logiclist.html", gin.H{"vid": vid, "pid": pid,"p_pid":p_pid})
+		c.HTML(200, "site_rule_logiclist.html", gin.H{"vid": vid, "pid": pid, "p_pid": p_pid})
 	})
 	//逻辑页面数据展示
 	Admin.POST("/site_management/rulelogic/data", func(c *gin.Context) {
@@ -190,9 +213,9 @@ func init() {
 		_id, _ := c.GetPostForm("_id")
 		//b := Mgo.Del("rule_logic", `{"_id":"`+_id+`"}`)
 		b := Mgo.UpdateById("site_rule_logic", _id, `{"$set":{"delete":true}}`)
-		go DelLogicPre(_id,true)
-		go DelLogicCore(_id,true)
-		go DelLogicBack(_id,true)
+		go DelLogicPre(_id, true)
+		go DelLogicCore(_id, true)
+		go DelLogicBack(_id, true)
 		c.JSON(200, gin.H{"rep": b})
 	})
 	//逻辑页面启用按钮
@@ -353,7 +376,7 @@ func init() {
 	Admin.GET("/site_management/ruleback", func(c *gin.Context) {
 		version := c.Query("version")
 		vid := c.Query("vid")
-		c.HTML(200, "site_rule_backlist.html", gin.H{"version": version,"vid":vid})
+		c.HTML(200, "site_rule_backlist.html", gin.H{"version": version, "vid": vid})
 	})
 	Admin.POST("/site_management/ruleback/data", func(c *gin.Context) {
 		version, _ := c.GetPostForm("version")
@@ -400,7 +423,7 @@ func init() {
 	Admin.GET("/site_management/tag", func(c *gin.Context) {
 		version := c.Query("version")
 		vid := c.Query("vid")
-		c.HTML(200, "site_taglist.html", gin.H{"version": version,"vid":vid})
+		c.HTML(200, "site_taglist.html", gin.H{"version": version, "vid": vid})
 	})
 	Admin.POST("/site_management/tag/data", TagDataSite)
 	Admin.POST("/site_management/tag/save", TagSaveSite)
@@ -503,7 +526,7 @@ func init() {
 		} else {
 			c.JSON(200, gin.H{"rep": false})
 		}
-	})   //删除数据
+	}) //删除数据
 
 	//校验
 	//正则规则验证
@@ -819,6 +842,7 @@ func OneTagSearchSite(c *gin.Context) {
 }
 
 var sysn sync.RWMutex
+
 func getSyncIndex(code string) string {
 	tmp := ""
 	sysn.Lock()
@@ -829,4 +853,4 @@ func getSyncIndex(code string) string {
 		tmp = code + "_" + fmt.Sprint((*data)["index"])
 	}
 	return tmp
-}
+}

+ 53 - 29
src/jy/admin/version.go

@@ -62,10 +62,10 @@ func init() {
 				data["delete"] = false
 				vid := Mgo.Save("version", data)
 				if s_pversionid != "" {
-					copyComRules(version, s_pversionid, s_username)
-				}
-				if iscopysite{
-					scopySites()
+					copyComRules(version, s_pversionid, s_username, false)
+					if iscopysite {
+						scopySites(version,s_pversionid,s_username , true)
+					}
 				}
 				if iscopyfiled {
 					list, _ := Mgo.Find("versioninfo", `{"vid":"`+s_pversionid+`","delete":false}`, nil, nil, false, -1, -1)
@@ -79,7 +79,10 @@ func init() {
 						pid := Mgo.Save("versioninfo", v)
 						s_field := qu.ObjToString(v["s_field"])
 						//克隆属性配置
-						copyFieldRules(vid, pid, s_field, s_pversionid, s_username,false)
+						copyFieldRules(vid, pid, s_field, s_pversionid, s_username, false)
+						if iscopysite {
+							copyFieldRules(vid, pid, s_field, s_pversionid, s_username, true)
+						}
 					}
 					//克隆分包属性
 					list2, _ := Mgo.Find("pkg_info", `{"vid":"`+s_pversionid+`","delete":false}`, nil, nil, false, -1, -1)
@@ -136,17 +139,21 @@ func init() {
 		//删除属性配置中的后置规则
 
 		//刪除属性配置
-		go DelVersionInfo(_id)
+		go DelVersionInfo(_id,false)
+		go DelVersionInfo(_id,true)//站点
 		//删除分包配置
 		go DelPkgInfo(_id)
 		//删除版本的通用前置规则
 		go DelRulePre(s_version)
 		//删除版本的通用后置规则
-		go DelRuleBack(s_version)
+		go DelRuleBack(s_version,false)
+		go DelRuleBack(s_version,true)
 		//删除标签库
-		go DelTag(s_version)
+		go DelTag(s_version,false)
+		go DelTag(s_version,true)
 		//删除版本的通用前置规则
-		go DelCleanUp(s_version)
+		go DelCleanUp(s_version,false)
+		go DelCleanUp(s_version,true)
 		c.JSON(200, gin.H{"rep": b})
 	})
 	Admin.GET("/version/info", func(c *gin.Context) {
@@ -182,7 +189,7 @@ func init() {
 				pid := Mgo.Save("versioninfo", data)
 				fromvid, _ := data["s_pversionid"].(string)
 				if fromvid != "" {
-					copyFieldRules(vid, pid, s_field, fromvid, sessions.Default(c).Get("username").(string),false)
+					copyFieldRules(vid, pid, s_field, fromvid, sessions.Default(c).Get("username").(string), false)
 				}
 				c.JSON(200, gin.H{"rep": true})
 			}
@@ -198,7 +205,7 @@ func init() {
 		_id, _ := c.GetPostForm("_id")
 		b := Mgo.UpdateById("versioninfo", _id, `{"$set":{"delete":true}}`)
 		v, _ := Mgo.FindById("versioninfo", _id, `{"vid":1}`)
-		go DelLogic(qu.ObjToString((*v)["vid"]), _id,false)
+		go DelLogic(qu.ObjToString((*v)["vid"]), _id, false)
 		//b := Mgo.Del("versioninfo", `{"_id":"`+_id+`"}`)
 		c.JSON(200, gin.H{"rep": b})
 	})
@@ -494,11 +501,27 @@ func init() {
 }
 
 //克隆版本通用属性
-func copyComRules(version, pvid, s_username string) {
-	tmp, _ := Mgo.FindById("version", pvid, nil)
+func copyComRules(version, pvid, s_username string, isSite bool) {
+	var versiondb, rule_predb, rule_backdb, tagdb, tagdetailinfodb, cleanupdb string
+	if isSite {
+		versiondb = "site_version"
+		rule_predb = "site_rule_pre"
+		rule_backdb = "site_rule_back"
+		tagdb = "site_tag"
+		tagdetailinfodb = "site_tagdetailinfo"
+		cleanupdb = "site_cleanup"
+	} else {
+		versiondb = "version"
+		rule_predb = "rule_pre"
+		rule_backdb = "rule_back"
+		tagdb = "tag"
+		tagdetailinfodb = "tagdetailinfo"
+		cleanupdb = "cleanup"
+	}
+	tmp, _ := Mgo.FindById(versiondb, pvid, nil)
 	oldversion := (*tmp)["version"].(string)
 	//克隆前置规则
-	plist, _ := Mgo.Find("rule_pre", `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
+	plist, _ := Mgo.Find(rule_predb, `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *plist {
 		delete(v, "_id")
 		v["s_version"] = version
@@ -506,10 +529,10 @@ func copyComRules(version, pvid, s_username string) {
 		v["s_username"] = s_username
 		v["l_createtime"] = time.Now().Unix()
 		v["l_lasttime"] = time.Now().Unix()
-		Mgo.Save("rule_pre", v)
+		Mgo.Save(rule_predb, v)
 	}
 	//克隆后置规则
-	blist, _ := Mgo.Find("rule_back", `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
+	blist, _ := Mgo.Find(rule_backdb, `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *blist {
 		delete(v, "_id")
 		v["s_version"] = version
@@ -517,35 +540,35 @@ func copyComRules(version, pvid, s_username string) {
 		v["s_username"] = s_username
 		v["l_createtime"] = time.Now().Unix()
 		v["l_lasttime"] = time.Now().Unix()
-		Mgo.Save("rule_back", v)
+		Mgo.Save(rule_backdb, v)
 	}
 	//克隆tag
-	tlist, _ := Mgo.Find("tag", `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
+	tlist, _ := Mgo.Find(tagdb, `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *tlist {
 		oldId := qu.BsonIdToSId(v["_id"])
 		delete(v, "_id")
 		v["s_version"] = version
 		v["s_creater"] = s_username
 		v["l_intime"] = time.Now().Unix()
-		newId := Mgo.Save("tag", v) //克隆父标签
-		td, _ := Mgo.Find("tagdetailinfo", `{"s_parentid":"`+oldId+`","delete":false}`, nil, nil, false, -1, -1)
+		newId := Mgo.Save(tagdb, v) //克隆父标签
+		td, _ := Mgo.Find(tagdetailinfodb, `{"s_parentid":"`+oldId+`","delete":false}`, nil, nil, false, -1, -1)
 		for _, v2 := range *td {
 			delete(v2, "_id")
 			v2["s_version"] = version
 			v2["s_creater"] = s_username
 			v2["l_intime"] = time.Now().Unix()
 			v2["s_parentid"] = newId
-			Mgo.Save("tagdetailinfo", v2) //克隆详细标签
+			Mgo.Save(tagdetailinfodb, v2) //克隆详细标签
 		}
 	}
 	//克隆函数
-	clist, _ := Mgo.Find("cleanup", `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
+	clist, _ := Mgo.Find(cleanupdb, `{"s_version":"`+oldversion+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *clist {
 		delete(v, "_id")
 		v["s_version"] = version
 		v["s_creater"] = s_username
 		v["l_intime"] = time.Now().Unix()
-		Mgo.Save("cleanup", v)
+		Mgo.Save(cleanupdb, v)
 	}
 }
 
@@ -583,15 +606,15 @@ func copyPkgRules(vid, pid, s_field, oldvid, s_username string) {
 }
 
 //克隆版本Field
-func copyFieldRules(vid, pid, s_field, oldvid, s_username string,isSite bool) {
-	var versioninfo,rule_logic,rule_logicpre,rule_logicore,rule_logicback string
-	if isSite{
+func copyFieldRules(vid, pid, s_field, oldvid, s_username string, isSite bool) {
+	var versioninfo, rule_logic, rule_logicpre, rule_logicore, rule_logicback string
+	if isSite {
 		versioninfo = "site_versioninfo"
 		rule_logic = "site_rule_logic"
 		rule_logicpre = "site_rule_logicpre"
 		rule_logicore = "site_rule_logicore"
 		rule_logicback = "site_rule_logicback"
-	}else {
+	} else {
 		versioninfo = "versioninfo"
 		rule_logic = "rule_logic"
 		rule_logicpre = "rule_logicpre"
@@ -674,8 +697,9 @@ func copyClearRuleLogic(vid, pid, s_field, oldvid, s_username string) {
 
 }
 
-func scopySites()  {
-	
+//站点克隆
+func scopySites(version, pvid, s_username string, isSite bool) {
+	copyComRules(version, pvid, s_username, true)
 }
 
 //获取代码

+ 12 - 7
src/jy/extract/exportask.go

@@ -52,10 +52,14 @@ func extractAndExport(v string, t map[string]interface{}) {
 	}
 	e.TaskInfo.FDB = db.MgoFactory(1, 3, 120, fmt.Sprint(t["dbaddr"]), fmt.Sprint(t["dbname"]))
 	e.InitRulePres()
-	e.InitRuleBacks()
-	e.InitRuleCore()
-	e.InitTag()
-	e.InitClearFn()
+	e.InitRuleBacks(false)
+	e.InitRuleBacks(true)
+	e.InitRuleCore(false)
+	e.InitRuleCore(true)
+	e.InitTag(false)
+	e.InitTag(true)
+	e.InitClearFn(false)
+	e.InitClearFn(true)
 	e.InfoTypeList()
 	e.InitBlockRule()
 	//品牌抽取是否开启
@@ -72,14 +76,15 @@ func extractAndExport(v string, t map[string]interface{}) {
 			continue
 		}
 		var j, jf *ju.Job
+		var isSite bool
 		if e.IsFileField && v["projectinfo"] != nil {
 			v["isextFile"] = true
-			j, jf = e.PreInfo(v)
+			j, jf,isSite = e.PreInfo(v)
 		} else {
-			j, _ = e.PreInfo(v)
+			j, _,isSite = e.PreInfo(v)
 		}
 		e.TaskInfo.ProcessPool <- true
-		go e.ExtractProcess(j, jf)
+		go e.ExtractProcess(j, jf,isSite)
 	}
 }
 

+ 11 - 11
src/jy/extract/extpackage.go

@@ -10,20 +10,20 @@ import (
 	"sort"
 )
 
-func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask) {
+func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask,isSite bool) {
 
 	if pkg.ColonKV != nil {
-		kvparse(pkg.ColonKV,  e, sonJobResult)
+		kvparse(pkg.ColonKV,  e, sonJobResult,isSite)
 	}
 	if pkg.TableKV != nil {
-		kvparse(pkg.TableKV,  e, sonJobResult)
+		kvparse(pkg.TableKV,  e, sonJobResult,isSite)
 	}
 	if pkg.SpaceKV != nil {
-		kvparse(pkg.SpaceKV,  e, sonJobResult)
+		kvparse(pkg.SpaceKV,  e, sonJobResult,isSite)
 	}
 }
 
-func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{}) {
+func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{},isSite bool) {
 	if p != nil {
 		for pk, pv2 := range p.KvTags {
 			if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
@@ -43,7 +43,7 @@ func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{})
 			if len(pv) == 0 {
 				continue
 			}
-			tags := ju.GetTags(pk)
+			tags := ju.GetTags(pk,isSite)
 			if tags.Len() > 0 {
 				if ((*sonJobResult)["name"]  == nil || (*sonJobResult)["name"] == "")&& tags[0].Key == "项目名称"{
 					(*sonJobResult)["name"] = pv[0].Value
@@ -77,7 +77,7 @@ func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{})
 }
 
 //处理分包信息
-func PackageDetail(j *ju.Job, e *ExtractTask) {
+func PackageDetail(j *ju.Job, e *ExtractTask,isSite bool) {
 	qu.Try(func() {
 		if len(j.BlockPackage) > 0 {
 			tmpkeys := []string{}
@@ -124,15 +124,15 @@ func PackageDetail(j *ju.Job, e *ExtractTask) {
 						}
 						sonJobResult["winnerorder"] = pkg.WinnerOrder
 					}
-					pkvdata(pkg, &sonJobResult, e)
+					pkvdata(pkg, &sonJobResult, e,isSite)
 
 					sonJobResult["type"] = pkg.Type
 					if len(tmpkeys) == 1{
 						if qu.Float64All(sonJobResult["budget"])==0{
 							for _,bv := range j.Block{
-								kvparse(bv.ColonKV,e,&sonJobResult)
-								kvparse(bv.TableKV,e,&sonJobResult)
-								kvparse(bv.SpaceKV,e,&sonJobResult)
+								kvparse(bv.ColonKV,e,&sonJobResult,isSite)
+								kvparse(bv.TableKV,e,&sonJobResult,isSite)
+								kvparse(bv.SpaceKV,e,&sonJobResult,isSite)
 							}
 						}
 					}

+ 85 - 42
src/jy/extract/extract.go

@@ -26,15 +26,16 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut           = ju.NewCut()                          //获取正文并清理
-	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask                //任务列表
-	ClearTaskList map[string]*ClearTask                  //清理任务列表
-	saveLimit     = 100                                  //抽取日志批量保存
-	PageSize      = 5000                                 //查询分页
+	cut     = ju.NewCut()                          //获取正文并清理
+	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask          //任务列表
+	ClearTaskList map[string]*ClearTask            //清理任务列表
+	saveLimit     = 100                            //抽取日志批量保存
+	PageSize      = 5000                           //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
+var Luacodes = sync.Map{}
 
 //启动测试抽取
 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool {
@@ -45,13 +46,17 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 	ext.InitTestTaskInfo(resultcoll, trackcoll)
 	ext.TaskInfo.FDB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
 	ext.InitRulePres()
-	ext.InitRuleBacks()
-	ext.InitRuleCore()
+	ext.InitRuleBacks(false)
+	ext.InitRuleBacks(true)
+	ext.InitRuleCore(false)
+	ext.InitRuleCore(true)
 	ext.InitPkgCore()
 	ext.InitBlockRule()
 	ext.InfoTypeList()
-	ext.InitTag()
-	ext.InitClearFn()
+	ext.InitTag(false)
+	ext.InitTag(true)
+	ext.InitClearFn(false)
+	ext.InitClearFn(true)
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		ext.InitCityInfo()
@@ -89,14 +94,15 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 				continue
 			}
 			var j, jf *ju.Job
+			var isSite bool
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf = ext.PreInfo(v)
+				j, jf, isSite = ext.PreInfo(v)
 			} else {
-				j, _ = ext.PreInfo(v)
+				j, _, isSite = ext.PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
-			go ext.ExtractProcess(j, jf)
+			go ext.ExtractProcess(j, jf, isSite)
 		}
 		return true
 	} else {
@@ -121,13 +127,17 @@ func StartExtractTaskId(taskId string) bool {
 	ext.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
 	ext.TaskInfo.TDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB)
 	ext.InitRulePres()
-	ext.InitRuleBacks()
-	ext.InitRuleCore()
+	ext.InitRuleBacks(false)
+	ext.InitRuleBacks(true)
+	ext.InitRuleCore(false)
+	ext.InitRuleCore(true)
 	ext.InitPkgCore()
 	ext.InitBlockRule()
 	ext.InfoTypeList()
-	ext.InitTag()
-	ext.InitClearFn()
+	ext.InitTag(false)
+	ext.InitTag(true)
+	ext.InitClearFn(false)
+	ext.InitClearFn(true)
 	if ext.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		//ext.InitCityDFA()
@@ -200,14 +210,15 @@ func RunExtractTask(taskId string) {
 				break
 			}
 			var j, jf *ju.Job
+			var isSite bool
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf = ext.PreInfo(v)
+				j, jf, isSite = ext.PreInfo(v)
 			} else {
-				j, _ = ext.PreInfo(v)
+				j, _, isSite = ext.PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
-			go ext.ExtractProcess(j, jf)
+			go ext.ExtractProcess(j, jf, isSite)
 			ext.TaskInfo.LastExtId = _id
 		}
 		db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
@@ -220,12 +231,12 @@ func RunExtractTask(taskId string) {
 }
 
 //信息预处理-不和版本关联,取最新版本的配置项
-func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
+func PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
 	return (&ExtractTask{}).PreInfo(doc)
 }
 
 //信息预处理-和版本关联
-func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
+func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
 	defer qu.Catch()
 	//判断是否有附件这个字段
 	var isextFile bool
@@ -309,15 +320,17 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 			IsFile:     isextFile,
 		}
 	}
+	//是否配置站点
+	_, isSite = Luacodes.Load(qu.ObjToString(doc["spidercode"]))
 	qu.Try(func() {
-		pretreated.AnalyStart(j) //job.Block分块
+		pretreated.AnalyStart(j, isSite) //job.Block分块
 		if isextFile {
-			pretreated.AnalyStart(jf)
+			pretreated.AnalyStart(jf, isSite)
 		}
 	}, func(err interface{}) {
 		log.Debug("pretreated.AnalyStart", err, j.SourceMid)
 	})
-	return j, jf
+	return j, jf, isSite
 }
 
 //遍历附件字段内容,拼接在一起;附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
@@ -353,17 +366,17 @@ func file2text(doc *map[string]interface{}) {
 }
 
 //抽取
-func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
-	e.ExtractDetail(j)
+func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
+	e.ExtractDetail(j, isSite)
 	if jf != nil && jf.IsFile {
-		e.ExtractFile(jf)
+		e.ExtractFile(jf, isSite)
 	}
 	//分析抽取结果并保存 todo
 	AnalysisSaveResult(j, jf, e)
 	<-e.TaskInfo.ProcessPool
 }
 
-func (e *ExtractTask) ExtractDetail(j *ju.Job) {
+func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool) {
 	qu.Try(func() {
 		doc := *j.Data
 		//全局前置规则,结果覆盖doc属性
@@ -373,17 +386,35 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 		tmprules := map[string][]*RuleCore{}
 		lockrule.Lock()
 		if j.Category == "all" || j.CategorySecond == "all" {
-			for k, vc1 := range e.RuleCores["all_all"] {
-				tmprules[k] = vc1
+			if isSite {
+				for k, vc1 := range e.SiteRuleCores["all_all"] {
+					tmprules[k] = vc1
+				}
+			} else {
+				for k, vc1 := range e.RuleCores["all_all"] {
+					tmprules[k] = vc1
+				}
 			}
 		} else {
-			for k, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
-				tmprules[k] = vc1
+			if isSite {
+				for k, vc1 := range e.SiteRuleCores[j.Category+"_"+j.CategorySecond] {
+					tmprules[k] = vc1
+				}
+			} else {
+				for k, vc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
+					tmprules[k] = vc1
+				}
 			}
 		}
 		if len(tmprules) < 1 { //分类未覆盖部分
-			for k, vc1 := range e.RuleCores["all_all"] {
-				tmprules[k] = vc1
+			if isSite {
+				for k, vc1 := range e.RuleCores["all_all"] {
+					tmprules[k] = vc1
+				}
+			} else {
+				for k, vc1 := range e.SiteRuleCores["all_all"] {
+					tmprules[k] = vc1
+				}
 			}
 		}
 		lockrule.Unlock()
@@ -435,14 +466,26 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 			}
 		}
 		//全局后置规则
-		for _, v := range e.RuleBacks {
-			ExtRegBack(j, v, e.TaskInfo)
+		if isSite {
+			for _, v := range e.SiteRuleBacks {
+				ExtRegBack(j, v, e.TaskInfo)
+			}
+		} else {
+			for _, v := range e.RuleBacks {
+				ExtRegBack(j, v, e.TaskInfo)
+			}
 		}
 		//函数清理
 		for key, val := range j.Result {
 			for _, v := range val {
 				lockclear.Lock()
-				cfn := e.ClearFn[key]
+				var cfn = []string{}
+				if isSite {
+					cfn = e.SiteClearFn[key]
+
+				} else {
+					cfn = e.ClearFn[key]
+				}
 				lockclear.Unlock()
 				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
 				before, _ := v.Value.(string)
@@ -461,14 +504,14 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 				lockclear.Unlock()
 			}
 		}
-		PackageDetail(j, e) //处理分包信息
+		PackageDetail(j, e, isSite) //处理分包信息
 		//		bs, _ := json.Marshal(j.Result)
 		//		 log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
 	}, func(err interface{}) {
 		log.Debug("ExtractProcess err", err)
 	})
 }
-func (e *ExtractTask) ExtractFile(j *ju.Job) {
+func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool) {
 	qu.Try(func() {
 		doc := *j.Data
 		//全局前置规则,结果覆盖doc属性
@@ -547,7 +590,7 @@ func (e *ExtractTask) ExtractFile(j *ju.Job) {
 			}
 		}
 
-		PackageDetail(j, e) //处理分包信息
+		PackageDetail(j, e, isSite) //处理分包信息
 		//		bs, _ := json.Marshal(j.Result)
 		//		 log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
 	}, func(err interface{}) {
@@ -1522,7 +1565,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 {                            //reids未找到,执行规则匹配
+	if i == 0 { //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 121 - 38
src/jy/extract/extractInit.go

@@ -59,18 +59,22 @@ type TaskInfo struct {
 	TestLua                             bool      //检查测试用
 }
 type ExtractTask struct {
-	Id        string        //任务id
-	IsRun     bool          //是否启动
-	Content   string        //信息内容
-	TaskInfo  *TaskInfo     //任务信息
-	RulePres  []*RegLuaInfo //通用前置规则
-	RuleBacks []*RegLuaInfo //通用后置规则
-	RuleBlock *ju.RuleBlock
+	Id            string        //任务id
+	IsRun         bool          //是否启动
+	Content       string        //信息内容
+	TaskInfo      *TaskInfo     //任务信息
+	RulePres      []*RegLuaInfo //通用前置规则
+	RuleBacks     []*RegLuaInfo //通用后置规则
+	SiteRuleBacks []*RegLuaInfo //站点通用后置规则
+	RuleBlock     *ju.RuleBlock
 	//RuleCores      []*RuleCore         //抽取规则
 	RuleCores     map[string]map[string][]*RuleCore //分类抽取规则
+	SiteRuleCores map[string]map[string][]*RuleCore //站点分类抽取规则
 	PkgRuleCores  []*RuleCore                       //分包抽取规则
 	Tag           map[string][]*Tag                 //标签库
+	SiteTag       map[string][]*Tag                 //站点标签库
 	ClearFn       map[string][]string               //清理函数
+	SiteClearFn   map[string][]string               //站点清理函数
 	IsExtractCity bool                              //是否开启城市抽取
 	Fields        map[string]int                    //抽取属性组
 
@@ -82,12 +86,12 @@ type ExtractTask struct {
 	ResultArr [][]map[string]interface{} //抽取结果详情
 	BidChanel chan bool                  //抽取结果
 	BidArr    [][]map[string]interface{} //抽取结果
-	BidTotal  int                        //结果数量
+	BidTotal int                         //结果数量
 
 	RecogFieldMap map[string]map[string]interface{}   //识别字段
 	FidClassMap   map[string][]map[string]interface{} //分类
-	CidRuleMap    map[string][]map[string]interface{} //规则
-	AuditFields   []string                            //需要审核的字段名称
+	CidRuleMap map[string][]map[string]interface{}    //规则
+	AuditFields []string                              //需要审核的字段名称
 
 	SiteCityMap          map[string]*SiteCity //站点对应的省市区
 	ProvinceMap          map[string]string    //省全称简称(key:浙江省 val:浙江)
@@ -272,10 +276,17 @@ func (e *ExtractTask) InitRulePres() {
 }
 
 //加载通用后置规则
-func (e *ExtractTask) InitRuleBacks() {
+func (e *ExtractTask) InitRuleBacks(isSite bool) {
 	defer qu.Catch()
-	e.RuleBacks = []*RegLuaInfo{}
-	list, _ := db.Mgo.Find("rule_back", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
+	cDB := ""
+	if isSite {
+		cDB = "site_rule_back"
+		e.SiteRuleBacks = []*RegLuaInfo{}
+	} else {
+		cDB = "rule_back"
+		e.RuleBacks = []*RegLuaInfo{}
+	}
+	list, _ := db.Mgo.Find(cDB, `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *list {
 		rinfo := &RegLuaInfo{
 			Code:  v["s_code"].(string),
@@ -284,7 +295,11 @@ func (e *ExtractTask) InitRuleBacks() {
 		}
 		if rinfo.IsLua {
 			rinfo.RuleText = v["s_luascript"].(string)
-			e.RuleBacks = append(e.RuleBacks, rinfo)
+			if isSite {
+				e.SiteRuleBacks = append(e.SiteRuleBacks, rinfo)
+			} else {
+				e.RuleBacks = append(e.RuleBacks, rinfo)
+			}
 		} else {
 			qu.Try(func() {
 				rinfo.RuleText = v["s_rule"].(string)
@@ -302,7 +317,11 @@ func (e *ExtractTask) InitRuleBacks() {
 				} else {
 					rinfo.RegPreBac = &ExtReg{Reg: regexp.MustCompile(pattern), Replace: ""}
 				}
-				e.RuleBacks = append(e.RuleBacks, rinfo)
+				if isSite {
+					e.SiteRuleBacks = append(e.SiteRuleBacks, rinfo)
+				} else {
+					e.RuleBacks = append(e.RuleBacks, rinfo)
+				}
 			}, func(err interface{}) {
 				log.Debug(rinfo.Code, rinfo.Field, err)
 			})
@@ -318,21 +337,36 @@ func (e *ExtractTask) InfoTypeList() {
 }
 
 //加载抽取规则
-func (e *ExtractTask) InitRuleCore() {
+func (e *ExtractTask) InitRuleCore(isSite bool) {
 	defer qu.Catch()
 	allFields := getALLFields()
 	e.Fields = map[string]int{}
-	e.RuleCores = make(map[string]map[string][]*RuleCore)
+	var versioninfodb, rule_logicdb, rule_logicpredb, rule_logicbackdb, rule_logicoredb string
+	if isSite {
+		versioninfodb = "site_versioninfo"
+		rule_logicdb = "site_rule_logic"
+		rule_logicpredb = "site_rule_logicpre"
+		rule_logicbackdb = "site_rule_logicback"
+		rule_logicoredb = "site_rule_logicore"
+		e.SiteRuleCores = make(map[string]map[string][]*RuleCore)
+	} else {
+		versioninfodb = "versioninfo"
+		rule_logicdb = "rule_logic"
+		rule_logicpredb = "rule_logicpre"
+		rule_logicbackdb = "rule_logicback"
+		rule_logicoredb = "rule_logicore"
+		e.RuleCores = make(map[string]map[string][]*RuleCore)
+	}
 
 	fieldrules := map[string][]*RuleCore{}
-	vinfos, _ := db.Mgo.Find("versioninfo", `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1)
+	vinfos, _ := db.Mgo.Find(versioninfodb, `{"vid":"`+e.TaskInfo.VersionId+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, vinfo := range *vinfos {
 		if b, _ := vinfo["isuse"].(bool); !b {
 			continue
 		}
 		s_field := qu.ObjToString(vinfo["s_field"])
 		pid := qu.BsonIdToSId(vinfo["_id"])
-		list, _ := db.Mgo.Find("rule_logic", `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
+		list, _ := db.Mgo.Find(rule_logicdb, `{"pid":"`+pid+`","delete":false}`, nil, nil, false, -1, -1)
 		for _, vv := range *list {
 			if b, _ := vv["isuse"].(bool); !b {
 				continue
@@ -344,7 +378,7 @@ func (e *ExtractTask) InitRuleCore() {
 			rcore.LFields = allFields
 			//前置规则
 			rulePres := []*RegLuaInfo{}
-			plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+			plist, _ := db.Mgo.Find(rule_logicpredb, `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
 			for _, v := range *plist {
 				rinfo := &RegLuaInfo{
 					Field: qu.ObjToString(v["s_field"]),
@@ -382,7 +416,7 @@ func (e *ExtractTask) InitRuleCore() {
 
 			//后置规则
 			ruleBacks := []*RegLuaInfo{}
-			blist, _ := db.Mgo.Find("rule_logicback", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+			blist, _ := db.Mgo.Find(rule_logicbackdb, `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
 			for _, v := range *blist {
 				rinfo := &RegLuaInfo{
 					Field: qu.ObjToString(v["s_field"]),
@@ -420,7 +454,7 @@ func (e *ExtractTask) InitRuleCore() {
 
 			//抽取规则
 			ruleCores := []*RegLuaInfo{}
-			clist, _ := db.Mgo.Find("rule_logicore", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
+			clist, _ := db.Mgo.Find(rule_logicoredb, `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","delete":false}`, nil, nil, false, -1, -1)
 			for _, v := range *clist {
 				if b, _ := v["isuse"].(bool); !b {
 					continue
@@ -484,15 +518,31 @@ func (e *ExtractTask) InitRuleCore() {
 	for _, v := range *infolist {
 		topclass := qu.ObjToString(v["topclass"])
 		if v["subclass"] == nil {
-			e.RuleCores[topclass] = make(map[string][]*RuleCore)
+			if isSite {
+				e.SiteRuleCores[topclass] = make(map[string][]*RuleCore)
+			} else {
+				e.RuleCores[topclass] = make(map[string][]*RuleCore)
+			}
 			for attr, _ := range v["fields"].(map[string]interface{}) {
-				e.RuleCores[topclass][attr] = fieldrules[attr]
+				if isSite {
+					e.SiteRuleCores[topclass][attr] = fieldrules[attr]
+				} else {
+					e.RuleCores[topclass][attr] = fieldrules[attr]
+				}
 			}
 		} else {
 			for ca, fs := range v["subclass"].(map[string]interface{}) {
-				e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
+				if isSite {
+					e.SiteRuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
+				} else {
+					e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
+				}
 				for field, _ := range fs.(map[string]interface{}) {
-					e.RuleCores[topclass+"_"+ca][field] = fieldrules[field]
+					if isSite {
+						e.SiteRuleCores[topclass+"_"+ca][field] = fieldrules[field]
+					} else {
+						e.RuleCores[topclass+"_"+ca][field] = fieldrules[field]
+					}
 				}
 			}
 		}
@@ -562,11 +612,18 @@ func (e *ExtractTask) InitPkgCore() {
 }
 
 //加载标签库
-func (e *ExtractTask) InitTag() {
+func (e *ExtractTask) InitTag(isSite bool) {
 	defer qu.Catch()
-	e.Tag = map[string][]*Tag{}
+	var tagdetailinfodb string
+	if isSite {
+		tagdetailinfodb = "site_tagdetailinfo"
+		e.SiteTag = map[string][]*Tag{}
+	} else {
+		tagdetailinfodb = "tagdetailinfo"
+		e.Tag = map[string][]*Tag{}
+	}
 	//字符串标签库
-	list, _ := db.Mgo.Find("tagdetailinfo", `{"s_type":"string","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
+	list, _ := db.Mgo.Find(tagdetailinfodb, `{"s_type":"string","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *list {
 		field := qu.ObjToString(v["s_field"])
 		if tmp, ok := v["content"].([]interface{}); ok {
@@ -575,16 +632,24 @@ func (e *ExtractTask) InitTag() {
 			tab.Items = make([]*ju.Tag, len(tmp))
 			for k, key := range tmp {
 				tag := &Tag{Type: "string", Key: key.(string)}
-				e.Tag[field] = append(e.Tag[field], tag)
+				if isSite {
+					e.SiteTag[field] = append(e.SiteTag[field], tag)
+				} else {
+					e.Tag[field] = append(e.Tag[field], tag)
+				}
 				tab.Items[k] = &ju.Tag{"", key.(string), 0 - k, nil, false}
 			}
 			sort.Sort(tab.Items)
 			//ju.TagdbTable[fname] = &tab
-			ju.TagdbTable.Store(fname, &tab)
+			if isSite {
+				ju.SiteTagdbTable.Store(fname, &tab)
+			} else {
+				ju.TagdbTable.Store(fname, &tab)
+			}
 		}
 	}
 	//正则标签库
-	list, _ = db.Mgo.Find("tagdetailinfo", `{"s_type":"reg","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
+	list, _ = db.Mgo.Find(tagdetailinfodb, `{"s_type":"reg","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
 	for _, v := range *list {
 		field := qu.ObjToString(v["s_field"])
 		if tmp, ok := v["content"].([]interface{}); ok {
@@ -593,12 +658,20 @@ func (e *ExtractTask) InitTag() {
 			tab.Items = make([]*ju.Tag, len(tmp))
 			for k, key := range tmp {
 				tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))}
-				e.Tag[field] = append(e.Tag[field], tag)
+				if isSite {
+					e.SiteTag[field] = append(e.SiteTag[field], tag)
+				} else {
+					e.Tag[field] = append(e.Tag[field], tag)
+				}
 				tab.Items[k] = &ju.Tag{"", key.(string), 0 - k, regexp.MustCompile(key.(string)), false}
 			}
 			sort.Sort(tab.Items)
 			//ju.TagdbTable[fname+"_reg"] = &tab
-			ju.TagdbTable.Store(fname+"_reg", &tab)
+			if isSite {
+				ju.SiteTagdbTable.Store(fname+"_reg", &tab)
+			} else {
+				ju.TagdbTable.Store(fname+"_reg", &tab)
+			}
 		}
 	}
 }
@@ -614,9 +687,15 @@ func getALLFields() map[string]string {
 }
 
 //加载clear函数
-func (e *ExtractTask) InitClearFn() {
+func (e *ExtractTask) InitClearFn(isSite bool) {
 	defer qu.Catch()
-	list, _ := db.Mgo.Find("cleanup", `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
+	var cleanupdb string
+	if isSite {
+		cleanupdb = "site_cleanup"
+	} else {
+		cleanupdb = "cleanup"
+	}
+	list, _ := db.Mgo.Find(cleanupdb, `{"s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)
 	fn := map[string][]string{}
 	for _, tmp := range *list {
 		field := tmp["s_field"].(string)
@@ -628,7 +707,11 @@ func (e *ExtractTask) InitClearFn() {
 			fn[field] = append(fn[field], v.(string))
 		}
 	}
-	e.ClearFn = fn
+	if isSite {
+		e.SiteClearFn = fn
+	} else {
+		e.ClearFn = fn
+	}
 }
 
 //加载省份
@@ -1217,7 +1300,7 @@ func (e *ExtractTask) InitAuditRule() {
 func (e *ExtractTask) InitAuditFields() {
 	if len(e.AuditFields) == 0 {
 		v, _ := db.Mgo.FindOne("version", `{"isuse":true,"delete":false}`) //查找当前使用版本
-		if v != nil && len(*v) > 0 {                                       //查找当前使用版本中属性配置需要审核的字段
+		if v != nil && len(*v) > 0 { //查找当前使用版本中属性配置需要审核的字段
 			vid := qu.BsonIdToSId((*v)["_id"])
 			query := map[string]interface{}{
 				"isaudit": true,

+ 20 - 13
src/jy/extract/extractudp.go

@@ -100,11 +100,15 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 		ext.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
 		ext.TaskInfo.TDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB)
 		ext.InitRulePres()
-		ext.InitRuleBacks()
-		ext.InitRuleCore()
+		ext.InitRuleBacks(false)
+		ext.InitRuleBacks(true)
+		ext.InitRuleCore(false)
+		ext.InitRuleCore(true)
 		ext.InitBlockRule()
-		ext.InitTag()
-		ext.InitClearFn()
+		ext.InitTag(false)
+		ext.InitTag(true)
+		ext.InitClearFn(false)
+		ext.InitClearFn(true)
 		if ext.IsExtractCity { //版本上控制是否开始城市抽取
 			//初始化城市DFA信息
 			//ext.InitCityDFA()
@@ -152,14 +156,15 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 					continue
 				}
 				var j, jf *ju.Job
+				var isSite bool
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf = ext.PreInfo(v)
+					j, jf,isSite = ext.PreInfo(v)
 				} else {
-					j, _ = ext.PreInfo(v)
+					j, _,isSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
-				go ext.ExtractProcess(j, jf)
+				go ext.ExtractProcess(j, jf,isSite)
 				index++
 			}
 			list2, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl+"_back", query, nil, Fields, false, -1, -1)
@@ -168,14 +173,15 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 					continue
 				}
 				var j, jf *ju.Job
+				var isSite bool
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf = ext.PreInfo(v)
+					j, jf,isSite = ext.PreInfo(v)
 				} else {
-					j, _ = ext.PreInfo(v)
+					j, _,isSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
-				go ext.ExtractProcess(j, jf)
+				go ext.ExtractProcess(j, jf,isSite)
 				index++
 			}
 			db.Mgo.UpdateById("esctask", (*tsk)["_id"], map[string]interface{}{
@@ -215,18 +221,19 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 				_id := qu.BsonIdToSId(v["_id"])
 				var j, jf *ju.Job
+				var isSite bool
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf = ext.PreInfo(v)
+					j, jf,isSite = ext.PreInfo(v)
 				} else {
-					j, _ = ext.PreInfo(v)
+					j, _,isSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
 				wg.Add(1)
 				go func(wg *sync.WaitGroup, j, jf *ju.Job) {
 					defer wg.Done()
 					//log.Debug(index,j.SourceMid,)
-					ext.ExtractProcess(j, jf)
+					ext.ExtractProcess(j, jf,isSite)
 				}(&wg, j, jf)
 				index++
 				if index%1000 == 0 {

+ 2 - 2
src/jy/pretreated/analykv.go

@@ -23,7 +23,7 @@ var matchkh = map[string]string{
 	"〖": "〗",
 }
 
-func GetKvFromtxt(con, tag string, from int) ([]*u.Kv, map[string][]*u.Tag) {
+func GetKvFromtxt(con, tag string, from int,isSite bool) ([]*u.Kv, map[string][]*u.Tag) {
 	res := FindKv(TextAfterRemoveTable(con), tag, from)
 	kvs := []*u.Kv{}
 	for _, k := range res.Keys {
@@ -35,7 +35,7 @@ func GetKvFromtxt(con, tag string, from int) ([]*u.Kv, map[string][]*u.Tag) {
 			})
 		}
 	}
-	kvTags := GetKvTags(kvs, tag, nil)
+	kvTags := GetKvTags(kvs, tag, nil,isSite)
 	return kvs, kvTags
 }
 

+ 19 - 19
src/jy/pretreated/analystep.go

@@ -12,7 +12,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-func AnalyStart(job *util.Job) {
+func AnalyStart(job *util.Job,isSite bool) {
 	con := job.Content
 	//全文的需要修复表格
 	con = RepairCon(con)
@@ -29,23 +29,23 @@ func AnalyStart(job *util.Job) {
 			ration = newration
 		}
 	}
-	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock) //分块
+	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock,isSite) //分块
 	if len(blockArrays) > 0 { //有分块
 		//从块里面找分包
-		job.BlockPackage = FindPackageFromBlocks(&blockArrays) //从块里面找分包
+		job.BlockPackage = FindPackageFromBlocks(&blockArrays,isSite) //从块里面找分包
 		for _, bl := range blockArrays {
 			//log.Println(bl.Text)
 			if len([]rune(bl.Text)) > 80 {
-				bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock)
+				bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock,isSite)
 				for _, bl_bl := range bl.Block {
-					processTableInBlock(bl_bl, job, false)
+					processTableInBlock(bl_bl, job, false,isSite)
 				}
 			}
 			FindProjectCode(bl.Text, job) //匹配项目编号
-			processTableInBlock(bl, job, true)
+			processTableInBlock(bl, job, true,isSite)
 			//新加 未分块table中未能解析到中标候选人,从正文中解析
 			if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
-				bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
+				bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1,isSite)
 				job.Winnerorder = bl.Winnerorder
 			}
 			job.Block = append(job.Block, bl)
@@ -57,33 +57,33 @@ func AnalyStart(job *util.Job) {
 		if len(tabs) > 0 { //解析表格逻辑
 			job.HasTable = 1 //添加标识:文本中有table
 			newCon = TextAfterRemoveTable(con)
-			job.BlockPackage = FindPackageFromText(job.Title, newCon)
+			job.BlockPackage = FindPackageFromText(job.Title, newCon,isSite)
 			for i := 0; i < len(tabs); i++ {
 				//log.Println(tabs[i].Text())
 				//添加标识:文本中有table
-				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
-				processTableResult(tabres, bl, job)
+				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock,isSite) //解析表格入口 返回:汇总表格对象
+				processTableResult(tabres, bl, job,isSite)
 			}
 		} else {
 			//从正文里面找分包
-			job.BlockPackage = FindPackageFromText(job.Title, newCon)
+			job.BlockPackage = FindPackageFromText(job.Title, newCon,isSite)
 		}
 		bl.Text = HtmlToText(con)
 		//log.Println(bl.Text)
 		FindProjectCode(bl.Text, job) //匹配项目编号
 		//调用kv解析
-		bl.ColonKV = GetKVAll(bl.Text, "", nil, 1)
-		bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil)
+		bl.ColonKV = GetKVAll(bl.Text, "", nil, 1,isSite)
+		bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil,isSite)
 		//新加 未分块table中未能解析到中标候选人,从正文中解析
 		if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
-			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1)
+			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1,isSite)
 			job.Winnerorder = bl.Winnerorder
 		}
 		job.Block = append(job.Block, bl)
 	}
 }
 
-func processTableInBlock(bl *util.Block, job *util.Job, packageFlag bool) {
+func processTableInBlock(bl *util.Block, job *util.Job, packageFlag,isSite bool) {
 	//块中再查找表格(块,处理完把值赋到块)
 	tabs, _ := ComputeConRatio(bl.Text, 2)
 	for _, tab := range tabs {
@@ -93,12 +93,12 @@ func processTableInBlock(bl *util.Block, job *util.Job, packageFlag bool) {
 			tmptag = strings.TrimSpace(tab.Nodes[0].PrevSibling.Data)
 		}
 		//添加标识:文本中有table
-		tabres := AnalyTableV2(tab, job.Category, tmptag, tab.Text(), 2, job.SourceMid, job.RuleBlock) //解析表格入口 返回:汇总表格对象
+		tabres := AnalyTableV2(tab, job.Category, tmptag, tab.Text(), 2, job.SourceMid, job.RuleBlock,isSite) //解析表格入口 返回:汇总表格对象
 		if packageFlag {
 			tabres.PackageMap = nil
 			tabres.IsMultiPackage = false
 		}
-		processTableResult(tabres, bl, job) //分析table解析结果
+		processTableResult(tabres, bl, job,isSite) //分析table解析结果
 		if bl.Title == "" && tabres.BlockTag != "" {
 			bl.Title = tabres.BlockTag
 		}
@@ -171,7 +171,7 @@ func FindProjectCode(newCon string, job *util.Job) {
 }
 
 //分析table解析结果
-func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
+func processTableResult(tabres *TableResult, block *util.Block, job *util.Job,isSite bool) {
 	//解析结果中的kv
 	if block.TableKV == nil {
 		block.TableKV = util.NewJobKv()
@@ -203,7 +203,7 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job) {
 			} else {
 				blockPackage.TableKV = util.NewJobKv()
 			}
-			MergeKvTags(blockPackage.TableKV.KvTags, GetKvTags(labelKVs, "", nil))
+			MergeKvTags(blockPackage.TableKV.KvTags, GetKvTags(labelKVs, "", nil,isSite))
 			tablePackage[v] = blockPackage
 		}
 	}

+ 60 - 60
src/jy/pretreated/analytable.go

@@ -122,7 +122,7 @@ func IsHide(g *goquery.Selection) (b bool) {
 
 //对表格的key进行标准化处理,多个k相同时,出现覆盖问题
 //待扩展,暂不支持正则标签库
-func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[string][]*u.Tag, returntag string) {
+func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool) (kvTags map[string][]*u.Tag, returntag string) {
 	kvTags = map[string][]*u.Tag{}
 	v1 := ""
 	if sv, sok := v.(string); sok { //取KV
@@ -141,9 +141,9 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[s
 	k1 := ClearKey(k, 2)
 	//u.Debug(2, k)
 	//取标准key
-	res := u.GetTags(k1)
+	res := u.GetTags(k1,isSite)
 	if len(res) == 0 && k1 != k {
-		res = u.GetTags(k)
+		res = u.GetTags(k,isSite)
 		k1 = k
 	}
 	//log.Println(k, res)
@@ -202,7 +202,7 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{}) (kvTags map[s
 }
 
 //对解析后的表格的kv进行过滤
-func (table *Table) KVFilter() {
+func (table *Table) KVFilter(isSite bool) {
 	//1.标准化值查找
 	//2.对数组的处理
 	//3.对分包的处理
@@ -230,7 +230,7 @@ func (table *Table) KVFilter() {
 			if k == `中标价(万元)\费率(%)`{
 				k = "中标价(万元)"
 			}
-			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v) //对key标准化处理,没有找到会走中标
+			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite) //对key标准化处理,没有找到会走中标
 			//qutil.Debug(k, v, k1, w1, v1, tag, b)
 			if tag != "" && table.Tag == "" {
 				table.Tag = tag
@@ -242,7 +242,7 @@ func (table *Table) KVFilter() {
 		}
 	}
 	//处理值是数组的kv放入标准化kv中//处理table.SortKV.value为数组的情况
-	table.sortKVArr(as)
+	table.sortKVArr(as,isSite)
 	//
 	if len(table.WinnerOrder) > 0 || !table.BPackage {
 		winnerOrder := []map[string]interface{}{}
@@ -274,7 +274,7 @@ func (table *Table) KVFilter() {
 	L: //遍历每个td,查询中标人
 		for _, tr := range table.TRs {
 			for _, td := range tr.TDs {
-				winnerOrder = winnerOrderEntity.Find(td.Val, true, 3)
+				winnerOrder = winnerOrderEntity.Find(td.Val, true, 3,isSite)
 				if len(winnerOrder) > 0 {
 					break L
 				}
@@ -305,7 +305,7 @@ func (table *Table) KVFilter() {
 }
 
 //处理table.SortKV.value为数组的情况
-func (table *Table) sortKVArr(as *SortMap) {
+func (table *Table) sortKVArr(as *SortMap,isSite bool) {
 	winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
 	if !winnertag {
 		winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
@@ -340,7 +340,7 @@ func (table *Table) sortKVArr(as *SortMap) {
 							res, _, _, _, repl := CheckCommon(k, "bidorder")
 							kv := ""
 							if !res {
-								kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""))
+								kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""),isSite)
 								if kt.Len() > 0 {
 									kv = kt[0].Value
 								}
@@ -444,7 +444,7 @@ func (table *Table) sortKVArr(as *SortMap) {
 					}
 				}
 			}
-			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v)
+			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite)
 			if tag != "" && table.Tag == "" {
 				table.Tag = tag
 			}
@@ -612,7 +612,7 @@ func (table *Table) MergerToTableresult() {
 解析表格入口
 返回:汇总表格对象
 **/
-func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock) (tabres *TableResult) {
+func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock,isSite bool) (tabres *TableResult) {
 	defer qutil.Catch()
 	//u.Debug(con)
 	if itype == 1 {
@@ -630,12 +630,12 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
 	tabres.GoqueryTabs = tabs
 	//}
 	//解析表格集
-	tabres.Analy()
+	tabres.Analy(isSite)
 	return
 }
 
 //开始解析表格集
-func (ts *TableResult) Analy() {
+func (ts *TableResult) Analy(isSite bool) {
 	tabs := []*Table{}
 	contactFormat := &u.ContactFormat{
 		IndexMap: map[int]string{},
@@ -644,7 +644,7 @@ func (ts *TableResult) Analy() {
 	//for _, table := range ts.GoqueryTabs {
 	tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
 	//核心模块
-	tsw := tn.Analy(contactFormat)
+	tsw := tn.Analy(contactFormat,isSite)
 	for _, tab := range tsw {
 		if len(tab.TRs) > 0 {
 			tabs = append(tabs, tab)
@@ -742,23 +742,23 @@ func (ts *TableResult) Analy() {
 }
 
 //解析表格
-func (table *Table) Analy(contactFormat *u.ContactFormat) []*Table {
+func (table *Table) Analy(contactFormat *u.ContactFormat,isSite bool) []*Table {
 	//查找表体中的tr对象
 	trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr")
 	if trs.Size() == 0 {
 		trs = table.Goquery.ChildrenFiltered("tr")
 	}
 	//遍历节点,初始化table 结构
-	table.createTabe(trs)
+	table.createTabe(trs,isSite)
 	//重置行列
 	table.ComputeRowColSpan()
 	//对table结构体进行整体解析处理
-	ts := table.AnalyTables(contactFormat)
+	ts := table.AnalyTables(contactFormat,isSite)
 	return ts
 }
 
 //遍历节点,初始化table 结构体
-func (table *Table) createTabe(trs *goquery.Selection) {
+func (table *Table) createTabe(trs *goquery.Selection,isSite bool) {
 	trs.Each(func(n int, sel *goquery.Selection) {
 		//隐藏行不处理
 		if IsHide(sel) {
@@ -775,7 +775,7 @@ func (table *Table) createTabe(trs *goquery.Selection) {
 				return
 			}
 			//进入每一个单元格
-			td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
+			td := NewTD(selm, TR, table,isSite) //初始化td,kv处理,td中有table处理,td的方向
 			//num++
 			TR.AddTD(td)
 			if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
@@ -793,7 +793,7 @@ func (table *Table) createTabe(trs *goquery.Selection) {
 }
 
 //对table进行整体解析处理
-func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
+func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Table {
 	ts := tn.tableSubDemolitionTable() //分包,拆表
 	for n, table := range ts {
 		//处理每个table
@@ -802,15 +802,15 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 			table.deleteTrimTr()
 			//table.Print()
 			//校对表格
-			table.Adjust()
+			table.Adjust(isSite)
 			//查找表格的标签,table.Tag字段
 			table.FindTag()
 			//log.Println(table.TableResult.Id, table.Html)
 			//分割表格
-			table.bSplit(n, ts)
-			table.TdContactFormat(contactFormat) //contactFormat,处理采购单位,代理机构
+			table.bSplit(n, ts,isSite)
+			table.TdContactFormat(contactFormat,isSite) //contactFormat,处理采购单位,代理机构
 			//开始查找kv,核心模块,table.SortKV
-			table.FindKV()
+			table.FindKV(isSite)
 			//table中抽取品牌,table.BrandData
 			if u.IsBrandGoods {
 				table.analyBrand()
@@ -818,7 +818,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 			res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
 			if !res {
 				//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
-				table.KVFilter()
+				table.KVFilter(isSite)
 			}
 			//对没有表头表格的处理
 			if table.Tag != "" {
@@ -848,7 +848,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat) []*Table {
 				}
 			}
 			//判断是否是多包,并处理分包的//遍历td分块
-			table.CheckMultiPackageByTable()
+			table.CheckMultiPackageByTable(isSite)
 			//MergeKvTags(table.TableResult.KvTags, table.StandKV)
 		}
 	}
@@ -913,7 +913,7 @@ func (table *Table) tableSubDemolitionTable() []*Table {
 }
 
 //分割表格
-func (table *Table) bSplit(n int, ts []*Table) {
+func (table *Table) bSplit(n int, ts []*Table,isSite bool) {
 	if table.BSplit {
 		if !table.BHeader && n > 0 {
 			for i := n - 1; i > -1; i-- {
@@ -921,7 +921,7 @@ func (table *Table) bSplit(n int, ts []*Table) {
 					if ts[i].BFirstRow {
 						//取第一行插入到
 						table.InsertTR(ts[i].TRs[0])
-						table.Adjust()
+						table.Adjust(isSite)
 					}
 					break
 				}
@@ -952,7 +952,7 @@ func (table *Table) deleteTrimTr() {
 }
 
 //校对表格
-func (table *Table) Adjust() {
+func (table *Table) Adjust(isSite bool) {
 	//计算行列起止位置,跨行跨列处理
 	table.ComputeRowColSpan()
 	//	for k1, tr := range table.TRs {
@@ -987,7 +987,7 @@ func (table *Table) Adjust() {
 	}
 	if float32(count)/float32(table.TDNum) < 0.85 {
 		//精确计算起止行列是表头的概率
-		table.ComputeRowColIsKeyRation()
+		table.ComputeRowColIsKeyRation(isSite)
 		bhead := false
 	L:
 		for i, tr := range table.TRs {
@@ -999,7 +999,7 @@ func (table *Table) Adjust() {
 						if res {
 							//删除此行
 							table.TRs = table.TRs[:len(table.TRs)-1]
-							table.Adjust()
+							table.Adjust(isSite)
 							return
 						}
 					}
@@ -1118,7 +1118,7 @@ func (table *Table) GetKeyRation() {
 }
 
 //计算行列是表头的概率调用GetKeyRation
-func (table *Table) ComputeRowColIsKeyRation() {
+func (table *Table) ComputeRowColIsKeyRation(isSite bool) {
 	//增加对跨行校正限止
 	//	u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum)
 	bkeyfirstrow := false
@@ -1357,7 +1357,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 				tr.TDs[0].BH = false
 				tr.TDs[0].KVDirect = 0
 				sv := FindKv(tr.TDs[0].Val, "", 2)
-				_, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2)
+				_, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2,isSite)
 				for k, v := range resm {
 					sv.AddKey(k, v)
 				}
@@ -1392,7 +1392,7 @@ func (table *Table) ComputeRowColIsKeyRation() {
 }
 
 //查找表格的kv,调用FindTdVal
-func (table *Table) FindKV() {
+func (table *Table) FindKV(isSite bool) {
 	//判断全是key的表格不再查找
 	if table.BHeader { //只要一个是key即为true
 		direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找
@@ -1468,7 +1468,7 @@ func (table *Table) FindKV() {
 			for n, r := range r1 {
 				if len([]rune(r)) < 60 { // 长度小于60才去分
 					//res1, _ := GetKVAll(r, "", nil)
-					res1, _ := colonkvEntity.entrance(r, "", nil, 2)
+					res1, _ := colonkvEntity.entrance(r, "", nil, 2,isSite)
 					if res1 != nil {
 						nmap[n] = res1
 						nmapkeys = append(nmapkeys, n)
@@ -1900,7 +1900,7 @@ func (tn *Table) GetTdByRCNo(row, col int) *TD {
 }
 
 //判断表格是否是分包
-func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
+func (tn *Table) CheckMultiPackageByTable(isSite bool) (b bool, index []string) {
 	pac := 0             //包的数量
 	val := 0             //分值
 	index = []string{}   //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断
@@ -1972,20 +1972,20 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 					tn.BlockPackage.AddKey(v, bp) //table子包数组
 				}
 			}
-			isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos) //多包处理,处理不同情况下的分包
+			isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos,isSite) //多包处理,处理不同情况下的分包
 		}
 	} else {
 		isGoonNext = true
 	}
 	if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
-		tn.isGoonNext()
+		tn.isGoonNext(isSite)
 	}
 	//查找分包中的中标人排序
 	if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
 		for _, v := range tn.BlockPackage.Keys {
 			vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage)
 			if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) {
-				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2)
+				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2,isSite)
 			}
 		}
 	}
@@ -1993,7 +1993,7 @@ func (tn *Table) CheckMultiPackageByTable() (b bool, index []string) {
 }
 
 //多包处理,处理不同情况下的分包
-func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int) (isGoonNext bool) {
+func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,isSite bool) (isGoonNext bool) {
 	if len(index) == 1 { //是一个的情况
 		if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
 			beq := true
@@ -2034,7 +2034,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 		if val, bvs := v1.([]string); bvs {
 			if len(val) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
 				for k, v := range val {
-					tn.assemblePackage(k1, v, index[k]) //组装解析到的分包
+					tn.assemblePackage(k1, v, index[k],isSite) //组装解析到的分包
 				}
 			} else {
 				for sk1, sv2 := range index {
@@ -2052,12 +2052,12 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 							}
 						}
 					}
-					tn.assemblePackage(k1, v, sv2)
+					tn.assemblePackage(k1, v, sv2,isSite)
 				}
 			}
 			//删除子包的kv
 			//u.Debug("----==1==-------", k1)
-			k1tags := u.GetTags(k1) //取得匹配
+			k1tags := u.GetTags(k1,isSite) //取得匹配
 			//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
 			//	tn.SortKV.RemoveKey(k1)
 			//}
@@ -2067,7 +2067,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 			}
 		} else if val, bvs := v1.(string); bvs && len(index) == 1 {
 			//删除子包的kv
-			kvTags, _ := CommonDataAnaly(k1, "", "", val)
+			kvTags, _ := CommonDataAnaly(k1, "", "", val,isSite)
 			for kvTag_k, kvTag_v := range kvTags {
 				hasValid := false
 				for _, kvTag_vv := range kvTag_v {
@@ -2081,7 +2081,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 				}
 				if !(len(kvTags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k)) {
 					tn.SortKV.RemoveKey(k1)
-					tn.assemblePackage(k1, val, index[0])
+					tn.assemblePackage(k1, val, index[0],isSite)
 					//log.Println("remove", k1, val)
 				}
 			}
@@ -2093,7 +2093,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int)
 }
 
 //没有处理成数组的情况下,继续调用正文查找分包的方法
-func (tn *Table) isGoonNext() {
+func (tn *Table) isGoonNext(isSite bool) {
 	blockPackage := map[string]*u.BlockPackage{}
 	for _, k := range tn.SortKV.Keys {
 		if excludeKey.MatchString(k) || strings.Contains(k, "批复") {
@@ -2107,7 +2107,7 @@ func (tn *Table) isGoonNext() {
 		} else {
 			str += fmt.Sprintf("%s:%s\n", nk, v)
 		}
-		b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false) //分块之后分包
+		b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false,isSite) //分块之后分包
 		if b && len(blockPackage) > 0 {
 			tn.BPackage = true
 			for mk, mv := range blockPackage {
@@ -2314,13 +2314,13 @@ func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, inde
 }
 
 //组装解析到的分包,//key如果匹配到抽取关键词就添加到table.SortKV
-func (tn *Table) assemblePackage(k1, v1, key string) {
+func (tn *Table) assemblePackage(k1, v1, key string,isSite bool) {
 	bp := tn.BlockPackage.Map[key].(*u.BlockPackage)
 	if bp.TableKV == nil {
 		bp.TableKV = u.NewJobKv()
 	}
 	if v1 != "" {
-		kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1) //匹配抽取关键词
+		kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1,isSite) //匹配抽取关键词
 		for k3, v3 := range kvTags {
 			bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
 		}
@@ -2477,7 +2477,7 @@ func replPkgConfusion(v1 string) string {
 }
 
 //对td中的值,进行再处理
-func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat) {
+func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat,isSite bool) {
 	//处理表格中的联系人信息
 	indexMap := contactFormat.IndexMap
 	matchMap := contactFormat.MatchMap
@@ -2549,7 +2549,7 @@ L:
 			//和|以?及|与|、多个词和在一起
 			jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
 			//分块之后的kv
-			thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock)
+			thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock,isSite)
 			if len(thisTdKvs) == 0 {
 				thisTdKvs = tn.tdkv(td) //获取冒号kv
 			}
@@ -2577,7 +2577,7 @@ L:
 				//都为正序查询
 				if allAscFind && tdAscFind {
 					//都为正序查询处理
-					matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex)
+					matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex,isSite)
 				}
 				if iscontinue {
 					continue
@@ -2647,7 +2647,7 @@ L:
 					}
 					thisTrHasMatch = true
 					//modle
-					modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index)
+					modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index,isSite)
 				}
 			}
 			//u.Debug(td.SortKV.Map)
@@ -2675,7 +2675,7 @@ L:
 }
 
 //modle
-func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int) {
+func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int,isSite bool) {
 	modle := 0
 	if len(thisTdKvs) == 1 {
 		if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
@@ -2690,7 +2690,7 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
 	} else {
 		//
 		if !strings.HasSuffix(td_k, "方式") {
-			kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts)
+			kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts,isSite)
 			if len(kvTags) == 1 {
 				tagVal, _ := u.FirstKeyValueInMap(kvTags)
 				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
@@ -2717,7 +2717,7 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
 }
 
 //都为正序查询
-func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
+func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int,isSite bool) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
 	for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
 		if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构
 			continue
@@ -2728,9 +2728,9 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
 		}
 		//匹配到进行处理
 		if ContactInfoVagueReg.MatchString(td_k) {
-			thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch)
+			thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch,isSite)
 		} else if k == "采购单位" { //打标签,权重高的重新覆盖
-			kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"})
+			kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"},isSite)
 			tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
 			if tagVal == k {
 				if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
@@ -2781,13 +2781,13 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
 }
 
 //匹配到进行处理
-func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool) bool {
+func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool,isSite bool) bool {
 	if (*matchMap)[k] == nil {
 		(*matchMap)[k] = map[string]bool{}
 	}
 	isAddToMatchMap := true
 	if !strings.HasSuffix(td_k, "方式") {
-		kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts)
+		kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts,isSite)
 		if len(kvTags) == 1 {
 			tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
 			if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {

+ 18 - 18
src/jy/pretreated/colonkv.go

@@ -67,10 +67,10 @@ func (ce *ColonkvEntity) divisionMoreKV(con string) string {
 }
 
 //获取冒号kv入口
-func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int) ([]*Kv, map[string]string) {
+func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int,isSite bool) ([]*Kv, map[string]string) {
 	kvs := ce.GetKvs(con, title, from)
 	if from == 1 {
-		FormatContactKv(&kvs, title, nil, contactFormat)
+		FormatContactKv(&kvs, title, nil, contactFormat,isSite)
 	}
 	kv := map[string]string{}
 	for _, v := range kvs {
@@ -163,14 +163,14 @@ func (ce *ColonkvEntity) getColonKv(con, title string, from int) []*Kv {
 }
 
 //冒号kv和空格kv结合
-func (ce *ColonkvEntity) getColonSpaceKV(con string) []*Kv {
+func (ce *ColonkvEntity) getColonSpaceKV(con string,isSite bool) []*Kv {
 	con = colonkvEntity.processText(con)
 	lines := SspacekvEntity.getLines(con)
 	kvMaps := []*Kv{}
 	for _, line := range lines {
 		kvs := colonkvEntity.getColonKv(line, "", 1)
 		if len(kvs) == 0 {
-			kv := SspacekvEntity.divideKV(line)
+			kv := SspacekvEntity.divideKV(line,isSite)
 			if kv != nil {
 				kvMaps = append(kvMaps, kv...)
 			}
@@ -276,7 +276,7 @@ func IsContactKvHandle(value string, m map[string]bool) bool {
 
 //kv关于联系人信息的处理
 //采购人>集中采购机构
-func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat) {
+func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat,isSite bool) {
 	////////////////////////////
 	//处理联系人信息
 	var indexMap map[int]string
@@ -436,7 +436,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 						matchMap[ct_k] = map[string]bool{}
 					}
 					if !strings.HasSuffix(k, "方式") {
-						kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts)
+						kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts,isSite)
 						if len(kvTags) == 1 {
 							tagVal, weightVal := FirstKeyValueInMap(kvTags)
 							if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
@@ -468,7 +468,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 					}
 				}
 				if ct_k == "采购单位" { //打标签,权重高的重新覆盖
-					kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"})
+					kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"},isSite)
 					tagVal, weightVal := FirstKeyValueInMap(kvTags)
 					if tagVal == ct_k {
 						if weightMap[ct_k][ct_k] == nil || (weightVal != nil && weightVal.(int) > weightMap[ct_k][ct_k].(int)) {
@@ -595,7 +595,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 		allMatchCount++
 		delete(totalIndexMap, myContactType)
 		if !strings.HasSuffix(k, "方式") {
-			kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts)
+			kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts,isSite)
 			if len(kvTags) == 1 {
 				tagVal, _ := FirstKeyValueInMap(kvTags)
 				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
@@ -707,20 +707,20 @@ func HasOrderContactType(text string) []string {
 
 //两种冒号kv结合到一起
 //from 1--全文 2--table td 3--table td解析采购单位联系人 4--分包
-func GetKVAll(content, title string, contactFormat *ContactFormat, from int) *JobKv {
+func GetKVAll(content, title string, contactFormat *ContactFormat, from int,isSite bool) *JobKv {
 	content = formatText(content, "kv")
-	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from)
+	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from,isSite)
 	//	for _, kvs := range m1Kvs {
 	//		qutil.Debug(kvs.Key, kvs.Value)
 	//	}
-	kvTags := GetKvTags(m1Kvs, title, nil)
+	kvTags := GetKvTags(m1Kvs, title, nil,isSite)
 	//	for k, kvs := range kvTags {
 	//		qutil.Debug("kkkkk--", k)
 	//		for _, kv := range kvs {
 	//			qutil.Debug(kv.Key, kv.Value)
 	//		}
 	//	}
-	m2Kvs, m2KvTags := GetKvFromtxt(content, title, from)
+	m2Kvs, m2KvTags := GetKvFromtxt(content, title, from,isSite)
 	//	for k, kvs := range m2KvTags {
 	//		qutil.Debug("kkkkk--", k)
 	//		for _, kv := range kvs {
@@ -774,7 +774,7 @@ func PrintKvTags(kvTags map[string][]*Tag) {
 }
 
 //KVTags转kv
-func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
+func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool) map[string][]*Tag {
 	kvTags := map[string][]*Tag{}
 	if title != "" && BlockTagMap[title] {
 		kvTags[title] = append(kvTags[title], &Tag{title, title, 0, nil, false})
@@ -792,17 +792,17 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
 		}
 		key = colonkvEntity.blockTitleKV(title, key)
 		//先用新的key
-		tags := GetAppointTags(key, tagdbs) //找标签库
+		tags := GetAppointTags(key, tagdbs,isSite) //找标签库
 		if len(tags) == 0 && len(key) < 10 && len(title) > 0 && len(title) < 15 {
 			key = title + key
-			tags = GetAppointTags(key, tagdbs)
+			tags = GetAppointTags(key, tagdbs,isSite)
 		}
 		//再用老的key
 		if len(tags) == 0 && k != key {
-			tags = GetAppointTags(k, tagdbs)
+			tags = GetAppointTags(k, tagdbs,isSite)
 			if len(tags) == 0 && len(k) < 10 && len(title) > 0 && len(title) < 15 {
 				k = title + k
-				tags = GetAppointTags(k, tagdbs)
+				tags = GetAppointTags(k, tagdbs,isSite)
 				if len(tags) > 0 {
 					key = k
 				}
@@ -824,7 +824,7 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string) map[string][]*Tag {
 							if strings.TrimSpace(nextval) == "" {
 								continue
 							}
-							if GetAppointTags(nextval, tagdbs).Len() > 0 || GetAppointTags(k, tagdbs).Len() > 0 {
+							if GetAppointTags(nextval, tagdbs,isSite).Len() > 0 || GetAppointTags(k, tagdbs,isSite).Len() > 0 {
 								continue
 							}
 						}

+ 17 - 17
src/jy/pretreated/division.go

@@ -87,7 +87,7 @@ var (
 )
 
 //分块
-func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock) ([]*util.Block, int) {
+func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite bool) ([]*util.Block, int) {
 	defer qutil.Catch()
 	returnValue := 0
 	var blocks []*util.Block
@@ -299,8 +299,8 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock) ([]*ut
 	for _, bl := range returnBlocks {
 		//解析kv
 		newText := TextAfterRemoveTable(bl.Text)
-		bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from)
-		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat)
+		bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from,isSite)
+		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat,isSite)
 		//正则抽取的时候有时需要匹配换行或者句号,这里在解析完kv之后,在块结尾添加换行和句号
 		bl.Text = appendWarpStop(bl.Text)
 	}
@@ -543,7 +543,7 @@ func filterTitle(title string) string {
 }
 
 //从块里面找分包
-func FindPackageFromBlocks(blocks *[]*util.Block) (blockPackage map[string]*util.BlockPackage) {
+func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool) (blockPackage map[string]*util.BlockPackage) {
 	blockPackage = map[string]*util.BlockPackage{}
 	//块分包
 	for _, v := range *blocks {
@@ -554,7 +554,7 @@ func FindPackageFromBlocks(blocks *[]*util.Block) (blockPackage map[string]*util
 		}
 		//var ok bool
 		//var surplusText string
-			divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"])
+			divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"],isSite)
 		////把分包内容摘除掉有问题 有的项目名称中包含二标段
 		//if ok && false {
 		//	v.Text = surplusText
@@ -566,15 +566,15 @@ func FindPackageFromBlocks(blocks *[]*util.Block) (blockPackage map[string]*util
 }
 
 //从正文里面找分包
-func FindPackageFromText(title string, content string) (blockPackage map[string]*util.BlockPackage) {
+func FindPackageFromText(title string, content string,isSite bool) (blockPackage map[string]*util.BlockPackage) {
 	blockPackage = map[string]*util.BlockPackage{}
 	//从正文里面找分包
-	divisionPackageChild(&blockPackage, content, title, true, false)
+	divisionPackageChild(&blockPackage, content, title, true, false,isSite)
 	return
 }
 
 //分块之后分包
-func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool) (bool, string) {
+func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool,isSite bool) (bool, string) {
 	//查找知否有分包
 	content = regMoreWrap.ReplaceAllString(content, "\n")
 	content = regEndWrap.ReplaceAllString(content, "")
@@ -708,14 +708,14 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 				//合并文本
 				(*blockPackage)[index].Text += "\n" + text
 				//合并冒号kv
-				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1)
+				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1,isSite)
 				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4)
+					kvAgain := GetKVAll(text, "", nil, 4,isSite)
 					MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
 				}
 				MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
 				//合并空格kv
-				spaceJobKv := SspacekvEntity.Entrance(text, "", nil)
+				spaceJobKv := SspacekvEntity.Entrance(text, "", nil,isSite)
 				MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
 			} else {
 				newBpkg := &util.BlockPackage{
@@ -725,13 +725,13 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 					Type:     bv[1],
 					Accuracy: accuracy,
 				}
-				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4)
+				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4,isSite)
 				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4)
+					kvAgain := GetKVAll(text, "", nil, 4,isSite)
 					MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
 				}
 				newBpkg.ColonKV = finalKv
-				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil)
+				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil,isSite)
 				(*blockPackage)[index] = newBpkg
 			}
 		}
@@ -739,7 +739,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	//中标人排序
 	if isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
 		for _, v := range *blockPackage {
-			v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2)
+			v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2,isSite)
 		}
 	}
 	return true, surplusText
@@ -852,8 +852,8 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
 }
 
 //分块之后的kv
-func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock) []*util.Kv {
-	blocks, _ := DivideBlock(tp, text, from, ruleBlock)
+func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock,isSite bool) []*util.Kv {
+	blocks, _ := DivideBlock(tp, text, from, ruleBlock,isSite)
 	kvs := []*util.Kv{}
 	for _, v := range blocks {
 		//util.Debug(v.Text)

+ 6 - 6
src/jy/pretreated/spacekv.go

@@ -16,18 +16,18 @@ var (
 	excludeSpaceKey = regexp.MustCompile("[.、�\\[【{{〔<《\\]】}}〕>》]")
 )
 
-func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.ContactFormat) *util.JobKv {
+func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.ContactFormat,isSite bool) *util.JobKv {
 	lines := se.getLines(text)
 	kvMaps := []*util.Kv{}
 	for _, line := range lines {
-		kvMap := se.divideKV(line)
+		kvMap := se.divideKV(line,isSite)
 		if kvMap == nil {
 			continue
 		}
 		kvMaps = append(kvMaps, kvMap...)
 	}
-	FormatContactKv(&kvMaps, title, nil, contactFormat)
-	kvTags := GetKvTags(kvMaps, title, nil)
+	FormatContactKv(&kvMaps, title, nil, contactFormat,isSite)
+	kvTags := GetKvTags(kvMaps, title, nil,isSite)
 	return &util.JobKv{
 		Kvs:    kvMaps,
 		KvTags: kvTags,
@@ -35,7 +35,7 @@ func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.Contac
 }
 
 //空格分kv
-func (se *SpacekvEntity) divideKV(line string) []*util.Kv {
+func (se *SpacekvEntity) divideKV(line string,isSite bool) []*util.Kv {
 	line = strings.TrimSpace(line)
 	line = regReplAllSpace.ReplaceAllString(line, " ")
 	line = TimeHM.ReplaceAllString(line, "D$1H$2M")
@@ -56,7 +56,7 @@ func (se *SpacekvEntity) divideKV(line string) []*util.Kv {
 			continue
 		}
 		//value为key值跳过
-		if util.GetTags(v).Len() > 0 && util.GetTags(k).Len() > 0{
+		if util.GetTags(v,isSite).Len() > 0 && util.GetTags(k,isSite).Len() > 0{
 			continue
 		}
 		kvs = append(kvs, &util.Kv{Key: k, Value: v})

+ 12 - 12
src/jy/pretreated/tablev2.go

@@ -87,7 +87,7 @@ var submatchreg = regexp.MustCompile(`((?:[一二三四五六七八九十0-10]+[
 var BHKey = regexp.MustCompile(`^[^,,;:。、.]{2,8}.{0,3}[::].+$`)
 var dwReg = regexp.MustCompile("单位[::/ \\s\u3000\u2003\u00a0\\n]*([万亿元]+)")
 
-func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
+func NewTD(Goquery *goquery.Selection, tr *TR, table *Table,isSite bool) *TD {
 	defer qutil.Catch()
 	td := &TD{
 		ArrVal:  []string{},
@@ -121,7 +121,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 		//qutil.Debug("有子表格")
 		//格式化正文
 		txt = TextAfterRemoveTable(td.Html)
-		td.tdHasTable(&bsontable, tr) //处理td中的table,块标签处理,子表解析集处理
+		td.tdHasTable(&bsontable, tr,isSite) //处理td中的table,块标签处理,子表解析集处理
 	} else {
 		txt = strings.TrimSpace(td.Goquery.Text())
 	}
@@ -130,7 +130,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 	td.Text = txt //原始串
 	//处理table外内容
 	var ub []*u.Block
-	ub, _ = DivideBlock("", txt, 2, table.TableResult.RuleBlock)
+	ub, _ = DivideBlock("", txt, 2, table.TableResult.RuleBlock,isSite)
 	//看是否划块
 	if len(ub) > 0 {
 		for _, bl := range ub {
@@ -175,7 +175,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 		}
 	}
 	//对td单元格值判断是否是表头和根据td内容长度进行分块处理
-	td.tdIsHb(tr, table, bsontable)
+	td.tdIsHb(tr, table, bsontable,isSite)
 	bhead := false
 	if td.TR.RowPos == 0 { //第一行
 		if td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
@@ -192,7 +192,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table) *TD {
 }
 
 //处理td中的table,块标签处理,子表解析集处理
-func (td *TD) tdHasTable(bsontable *bool, tr *TR) {
+func (td *TD) tdHasTable(bsontable *bool, tr *TR,isSite bool) {
 	ts := td.TR.Table.TableResult
 	tabs, _ := ComputeConRatio(td.Html, 2) //计算表格占比
 	if len(tabs) > 0 {
@@ -225,7 +225,7 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR) {
 			}
 			sonts := NewTableResult(ts.Id, ts.Toptype, stag, td.Html, 2, td.TR.Table.TableResult.RuleBlock)
 			sonts.GoqueryTabs = tv
-			sonts.Analy()
+			sonts.Analy(isSite)
 
 			//sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
 			td.BH = false
@@ -300,7 +300,7 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR) {
 }
 
 //对td单元格值判断是否是表头和根据td内容长度进行分块处理
-func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
+func (td *TD) tdIsHb(tr *TR, table *Table, bsontable,isSite bool) {
 	lenval := len([]rune(td.Val)) //经过处理的td内容长度
 	//if lentxt > 9 {
 	//td.KV = GetKVAll(txt, "")
@@ -308,7 +308,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 	//经过处理的td内容长度大于50,划块,分包
 	if lenval > 50 { //看是否划块
 		//u.Debug(txt)
-		ub, _ = DivideBlock("", td.Text, 2, table.TableResult.RuleBlock) //对td的原始值
+		ub, _ = DivideBlock("", td.Text, 2, table.TableResult.RuleBlock,isSite) //对td的原始值
 		//看是否划块
 		if len(ub) > 0 {
 			for _, bl := range ub {
@@ -341,9 +341,9 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 		}
 		if isFindPkg {
 			if len(ub) > 0 {
-				blockPackage = FindPackageFromBlocks(&ub) //从块里面找分包
+				blockPackage = FindPackageFromBlocks(&ub,isSite) //从块里面找分包
 			} else {
-				blockPackage = FindPackageFromText("", td.Val) //从正文里面找分包
+				blockPackage = FindPackageFromText("", td.Val,isSite) //从正文里面找分包
 			}
 		}
 		if len(blockPackage) > 0 {
@@ -384,7 +384,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 			td.SortKV.AddKey(strings.TrimSpace(td.Text[:tagindex]), strings.TrimSpace(td.Text[tagindex:])) //存放kv值
 			td.BH = true
 		}
-		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 3) //td冒号kv
+		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 3,isSite) //td冒号kv
 		for k, v := range resm {
 			if k != "" && v != "" {
 				td.SortKV.AddKey(k, v) //存放kv值
@@ -445,7 +445,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable bool) {
 		if len(td.TR.TDs) > 0 {
 			kvTitle = td.TR.TDs[len(td.TR.TDs)-1].Val
 		}
-		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 2) //获取冒号kv入口
+		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 2,isSite) //获取冒号kv入口
 		for k, v := range resm {
 			td.SortKV.AddKey(k, v)
 		}

+ 8 - 8
src/jy/pretreated/winnerorder.go

@@ -51,7 +51,7 @@ var (
  *text文本,flag非否精确查找
  *from 来源
  */
-func (wo *WinnerOrderEntity) Find(text string, flag bool, from int) []map[string]interface{} {
+func (wo *WinnerOrderEntity) Find(text string, flag bool, from int,isSite bool) []map[string]interface{} {
 	text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n")
 	/*
 		"_id" : ObjectId("5c2c6f60a5cb26b9b7b62cd8")
@@ -70,13 +70,13 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int) []map[string
 	if len(blocks) == 0 {
 		blocks = append(blocks, text)
 	}
-	winners := wo.findByReg(text, blocks, winnerReg1, from)
+	winners := wo.findByReg(text, blocks, winnerReg1, from,isSite)
 	if len(winners) == 0 {
-		winners = wo.findByReg(text, blocks, winnerReg2, from)
+		winners = wo.findByReg(text, blocks, winnerReg2, from,isSite)
 	}
 	if len(winners) == 0 {
 		if flag {
-			winners = wo.findByReg(text, blocks, winnerReg3, from)
+			winners = wo.findByReg(text, blocks, winnerReg3, from,isSite)
 		} else {
 			indexs_4 := winnerReg4.Split(text, -1)
 			if len(indexs_4) > 1 {
@@ -87,7 +87,7 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int) []map[string
 					}
 					for _, v_3 := range indexs_3 {
 						if strings.Count(v_4[:v_3[1]], "\n") <= 3 {
-							winners = wo.findByReg(text, blocks, winnerReg3, from)
+							winners = wo.findByReg(text, blocks, winnerReg3, from,isSite)
 							break
 						}
 					}
@@ -206,7 +206,7 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 }
 
 //抽取对应的排序结果
-func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *regexp.Regexp, from int) []map[string]interface{} {
+func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *regexp.Regexp, from int,isSite bool) []map[string]interface{} {
 	text := wo.getText(content, blocks, reg_2, from)
 	winners := []map[string]interface{}{}
 	if len(text) < 1 {
@@ -215,7 +215,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 	for i, v := range text {
 		object := map[string]interface{}{}
 		count := 0
-		kvs := colonkvEntity.getColonSpaceKV(v)
+		kvs := colonkvEntity.getColonSpaceKV(v,isSite)
 		for _, kv := range kvs {
 			k, v := kv.Key, kv.Value
 			if regDivision.MatchString(v) {
@@ -242,7 +242,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				if offerReg.MatchString(k) {
 					findOfferFlag = true
 				} else {
-					kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"})
+					kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"},isSite)
 					if len(kvTags["中标金额"]) > 0 {
 						findOfferFlag = true
 					}

+ 27 - 12
src/jy/util/tagmatch.go

@@ -43,6 +43,7 @@ func (s Tags) Less(i, j int) bool {
 //
 //var TagdbTable = make(map[string]*TagFile)
 var TagdbTable = sync.Map{}
+var SiteTagdbTable = sync.Map{}
 var blocktagdb = make(map[string]*TagFile)
 
 //加载
@@ -126,12 +127,12 @@ func LoadTagDb(respath string) {
 }
 
 //取得匹配
-func GetTags(src string) Tags {
-	return GetAppointTags(src, nil)
+func GetTags(src string,isSite bool) Tags {
+	return GetAppointTags(src, nil,isSite)
 }
 
 //根据指定的标签库取得匹配
-func GetAppointTags(src string, array []string) Tags {
+func GetAppointTags(src string, array []string,isSite bool) Tags {
 	src = TrimLRAll(src, "")
 	ret := make(Tags, 0)
 	m := map[string]bool{}
@@ -150,17 +151,31 @@ func GetAppointTags(src string, array []string) Tags {
 	//	}
 	//}
 	//lock.Unlock()
-	TagdbTable.Range(func(key, value interface{}) bool {
-		if len(m) > 0 && !m[fmt.Sprint(key)] {
+	if isSite{
+		SiteTagdbTable.Range(func(key, value interface{}) bool {
+			if len(m) > 0 && !m[fmt.Sprint(key)] {
+				return true
+			}
+			if v,ok := value.(*TagFile);ok {
+				if ok, tag := v.Match(src); ok {
+					ret = append(ret, &Tag{src, v.Name, tag.Weight, tag.TagReg, false})
+				}
+			}
 			return true
-		}
-		if v,ok := value.(*TagFile);ok {
-			if ok, tag := v.Match(src); ok {
-				ret = append(ret, &Tag{src, v.Name, tag.Weight, tag.TagReg, false})
+		})
+	}else {
+		TagdbTable.Range(func(key, value interface{}) bool {
+			if len(m) > 0 && !m[fmt.Sprint(key)] {
+				return true
 			}
-		}
-		return true
-	})
+			if v,ok := value.(*TagFile);ok {
+				if ok, tag := v.Match(src); ok {
+					ret = append(ret, &Tag{src, v.Name, tag.Weight, tag.TagReg, false})
+				}
+			}
+			return true
+		})
+	}
 	//sort.Sort(ret)
 	return ret
 }

+ 12 - 0
src/main.go

@@ -1,12 +1,14 @@
 package main
 
 import (
+	"gopkg.in/mgo.v2/bson"
 	_ "jy/admin"
 	_ "jy/admin/audit"
 	_ "jy/admin/distribution"
 	_ "jy/admin/task"
 	"jy/extract"
 	_ "jy/front"
+	"jy/mongodbutil"
 	. "jy/router"
 	"jy/util"
 	qu "qfw/util"
@@ -34,6 +36,16 @@ func init() {
 	//初始化elastic连接
 	//"winner=172.17.145.179:2710,buyer=172.17.145.179:2711"
 	//elastic.InitElasticSize(qu.ObjToString(util.Config["elasticsearch"]), qu.IntAllDef(util.Config["elasticPoolSize"], 30))
+	sites,_:=mongodbutil.Mgo.Find("site_management",bson.M{},nil,bson.M{"site_script":1},false,-1,-1)
+	for _,v := range *sites{
+		if vv,ok:= v["site_script"].([]interface{});ok{
+			for _,vv := range vv{
+				extract.Luacodes.Store(vv,true)
+			}
+		}else if vv,ok:= v["site_script"].(interface{});ok{
+			extract.Luacodes.Store(vv,true)
+		}
+	}
 }
 
 func main() {

+ 9 - 5
src/main_blocktest.go

@@ -78,11 +78,15 @@ func com(doc map[string]interface{}) {
 	//d.TaskInfo.FDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
 	//d.TaskInfo.TDB = db.MgoFactory(3, 5, 600, ext.TaskInfo.ToDbAddr, ext.TaskInfo.ToDB)
 	e.InitRulePres()
-	e.InitRuleBacks()
-	e.InitRuleCore()
+	e.InitRuleBacks(false)
+	e.InitRuleBacks(true)
+	e.InitRuleCore(false)
+	e.InitRuleCore(true)
 	e.InitBlockRule()
-	e.InitTag()
-	e.InitClearFn()
+	e.InitTag(false)
+	e.InitTag(true)
+	e.InitClearFn(false)
+	e.InitClearFn(true)
 	if e.IsExtractCity { //版本上控制是否开始城市抽取
 		//初始化城市DFA信息
 		e.InitCityDFA()
@@ -115,7 +119,7 @@ func com(doc map[string]interface{}) {
 		RuleBlock: e.RuleBlock,
 	}
 	e.TaskInfo.ProcessPool <- true
-	pretreated.AnalyStart(j)
+	pretreated.AnalyStart(j,isSite)
 	e.ExtractProcess(j, nil)
 
 	log.Println("=============块信息================")

+ 2 - 2
src/web/templates/admin/site_rule_logicore.html

@@ -154,8 +154,8 @@ $(function () {
 			case "newlua":
 			case "new":
 				comtag=[{label:"名称",s_label:"s_name",placeholder:"",must:true},{label:"描述",s_label:"s_descript",type:"tpl_text"},{label:"启用",s_label:"isuse",type:"tpl_list_local",list:[{"s_name":"是","_id":true},{"s_name":"否","_id":false}],default:true}]
-				regtag=[{label:"字段",s_label:"s_field",type:"tpl_list_local",url:"/admin/site_management/getfields",default:{{.field}}},{label:"正则",s_label:"s_rule",type:"tpl_text",must:true}]
-				luatag=[{label:"字段",s_label:"s_field",type:"tpl_list_local",url:"/admin/site_management/getfields",default:{{.field}}},{label:"脚本",s_label:"s_luascript",type:"tpl_text",must:true}]
+				regtag=[{label:"字段",s_label:"s_field",type:"tpl_list_local",url:"/admin/getfields",default:{{.field}}},{label:"正则",s_label:"s_rule",type:"tpl_text",must:true}]
+				luatag=[{label:"字段",s_label:"s_field",type:"tpl_list_local",url:"/admin/getfields",default:{{.field}}},{label:"脚本",s_label:"s_luascript",type:"tpl_text",must:true}]
 				testcon=[{label:"测试内容",s_label:"s_testcon",type:"tpl_text",must:true}]
 				hiddentag=[{s_label:"_id",type:"tpl_hidden"},{s_label:"vid",type:"tpl_hidden"},{s_label:"pid",type:"tpl_hidden"},{s_label:"sid",type:"tpl_hidden"},{s_label:"s_type",type:"tpl_hidden"}]
 				islua=false

+ 11 - 2
src/web/templates/admin/version.html

@@ -92,6 +92,15 @@
 							</select>
 						</div>
 					</div>
+					<div class="form-group">
+						<label for="code" id="fieldname" class="col-sm-2 control-label">克隆站点:</label>
+						<div class="col-sm-10">
+							<select class="form-control" id="iscopysite">
+								<option value=false>否</option>
+								<option value=true>是</option>
+							</select>
+						</div>
+					</div>
 					<div class="form-group">
 						<label for="code" id="fieldname" class="col-sm-2 control-label">抽取附件:</label>
 						<div class="col-sm-10">
@@ -153,7 +162,7 @@ $(function () {
 		"columns": [
 				{ "data": "version","width":"5%"},
 			{ "data": "s_username","width":"5%"},
-			{ "data": "s_descript","width":"27%"},
+			{ "data": "s_descript","width":"25%"},
 			{ "data": "isuse","width":"7%",render:function(val,a,row){
 				tmp=""
 				if(val){
@@ -355,7 +364,7 @@ function save(){
         data:{"_id":_id,"s_descript":s_descript,"version":version,"isextractcity":isextractcity,"s_pversionid":s_pversionid,"iscopyfiled":iscopyfiled,"iscopysite":iscopysite,"isfiles":isfiles,"s_filefileds":s_filefiledsJsonStr},
         success:function(r){
 			if(r&&r.rep){
-				window.location.href="/admin/version"
+				// window.location.href="/admin/version"
 			}else{
 				showTip("已存在!",1000)
 			}