Sfoglia il codice sorgente

站点标签库获取异常

fengweiqiang 5 anni fa
parent
commit
016dd15428

+ 3 - 3
src/jy/admin/rulecheck.go

@@ -307,7 +307,7 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 //lua脚本前置过滤验证
 func checkPreScript(code, name, infoid, script string) map[string]interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _,_ := extract.PreInfo(*doc)
+	j, _,_,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block
@@ -331,7 +331,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 	e.InitTag(false)
 	e.InitTag(true)
 	tmp, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _,_ := extract.PreInfo(*tmp)
+	j, _,_,_ := extract.PreInfo(*tmp)
 	doc := *j.Data
 	//全局前置规则,结果覆盖doc属性
 	for _, v := range e.RulePres {
@@ -394,7 +394,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 //lua脚本抽取验证
 func checkCoreScript(code, name, infoid, script string) interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _,_ := extract.PreInfo(*doc)
+	j, _,_,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block

+ 4 - 3
src/jy/extract/exportask.go

@@ -78,14 +78,15 @@ func extractAndExport(v string, t map[string]interface{}) {
 		}
 		var j, jf *ju.Job
 		var isSite bool
+		var codeSite string
 		if e.IsFileField && v["projectinfo"] != nil {
 			v["isextFile"] = true
-			j, jf,isSite = e.PreInfo(v)
+			j, jf,isSite,codeSite = e.PreInfo(v)
 		} else {
-			j, _,isSite = e.PreInfo(v)
+			j, _,isSite,codeSite = e.PreInfo(v)
 		}
 		e.TaskInfo.ProcessPool <- true
-		go e.ExtractProcess(j, jf,isSite)
+		go e.ExtractProcess(j, jf,isSite,codeSite)
 	}
 }
 

+ 11 - 11
src/jy/extract/extpackage.go

@@ -10,20 +10,20 @@ import (
 	"sort"
 )
 
-func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask,isSite bool) {
+func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask,isSite bool,codeSite string) {
 
 	if pkg.ColonKV != nil {
-		kvparse(pkg.ColonKV,  e, sonJobResult,isSite)
+		kvparse(pkg.ColonKV,  e, sonJobResult,isSite,codeSite)
 	}
 	if pkg.TableKV != nil {
-		kvparse(pkg.TableKV,  e, sonJobResult,isSite)
+		kvparse(pkg.TableKV,  e, sonJobResult,isSite,codeSite)
 	}
 	if pkg.SpaceKV != nil {
-		kvparse(pkg.SpaceKV,  e, sonJobResult,isSite)
+		kvparse(pkg.SpaceKV,  e, sonJobResult,isSite,codeSite)
 	}
 }
 
-func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{},isSite bool) {
+func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{},isSite bool,codeSite string) {
 	if p != nil {
 		for pk, pv2 := range p.KvTags {
 			if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
@@ -43,7 +43,7 @@ func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{},
 			if len(pv) == 0 {
 				continue
 			}
-			tags := ju.GetTags(pk,isSite)
+			tags := ju.GetTags(pk,isSite,codeSite)
 			if tags.Len() > 0 {
 				if ((*sonJobResult)["name"]  == nil || (*sonJobResult)["name"] == "")&& tags[0].Key == "项目名称"{
 					(*sonJobResult)["name"] = pv[0].Value
@@ -77,7 +77,7 @@ func kvparse(p *ju.JobKv,  e *ExtractTask, sonJobResult *map[string]interface{},
 }
 
 //处理分包信息
-func PackageDetail(j *ju.Job, e *ExtractTask,isSite bool) {
+func PackageDetail(j *ju.Job, e *ExtractTask,isSite bool,codeSite string) {
 	qu.Try(func() {
 		if len(j.BlockPackage) > 0 {
 			tmpkeys := []string{}
@@ -124,15 +124,15 @@ func PackageDetail(j *ju.Job, e *ExtractTask,isSite bool) {
 						}
 						sonJobResult["winnerorder"] = pkg.WinnerOrder
 					}
-					pkvdata(pkg, &sonJobResult, e,isSite)
+					pkvdata(pkg, &sonJobResult, e,isSite,codeSite)
 
 					sonJobResult["type"] = pkg.Type
 					if len(tmpkeys) == 1{
 						if qu.Float64All(sonJobResult["budget"])==0{
 							for _,bv := range j.Block{
-								kvparse(bv.ColonKV,e,&sonJobResult,isSite)
-								kvparse(bv.TableKV,e,&sonJobResult,isSite)
-								kvparse(bv.SpaceKV,e,&sonJobResult,isSite)
+								kvparse(bv.ColonKV,e,&sonJobResult,isSite,codeSite)
+								kvparse(bv.TableKV,e,&sonJobResult,isSite,codeSite)
+								kvparse(bv.SpaceKV,e,&sonJobResult,isSite,codeSite)
 							}
 						}
 					}

+ 34 - 23
src/jy/extract/extract.go

@@ -97,14 +97,15 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 			}
 			var j, jf *ju.Job
 			var isSite bool
+			var codeSite string
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf, isSite = ext.PreInfo(v)
+				j, jf, isSite,codeSite = ext.PreInfo(v)
 			} else {
-				j, _, isSite = ext.PreInfo(v)
+				j, _, isSite,codeSite = ext.PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
-			go ext.ExtractProcess(j, jf, isSite)
+			go ext.ExtractProcess(j, jf, isSite,codeSite)
 		}
 		return true
 	} else {
@@ -214,14 +215,15 @@ func RunExtractTask(taskId string) {
 			}
 			var j, jf *ju.Job
 			var isSite bool
+			var codeSite string
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf, isSite = ext.PreInfo(v)
+				j, jf, isSite,codeSite = ext.PreInfo(v)
 			} else {
-				j, _, isSite = ext.PreInfo(v)
+				j, _, isSite,codeSite = ext.PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
-			go ext.ExtractProcess(j, jf, isSite)
+			go ext.ExtractProcess(j, jf, isSite,codeSite)
 			ext.TaskInfo.LastExtId = _id
 		}
 		db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
@@ -234,12 +236,12 @@ func RunExtractTask(taskId string) {
 }
 
 //信息预处理-不和版本关联,取最新版本的配置项
-func PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
+func PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool,codeSite string) {
 	return (&ExtractTask{}).PreInfo(doc)
 }
 
 //信息预处理-和版本关联
-func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
+func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool,codeSite string) {
 	defer qu.Catch()
 	//判断是否有附件这个字段
 	var isextFile bool
@@ -324,22 +326,31 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		}
 	}
 	//是否配置站点
-	exp, isSite := Luacodes.Load(qu.ObjToString(doc["spidercode"]))
+	codeSite = qu.ObjToString(doc["spidercode"])
+	exp, isSite := Luacodes.Load(codeSite)
 	if isSite{
-		e.SiteClearFn = exp.( map[string]interface{})["e.SiteClearFn"].( map[string][]string)
-		e.SiteTag = exp.( map[string]interface{})["e.SiteTag"].( map[string][]*Tag)
-		e.SiteRuleCores = exp.( map[string]interface{})["e.SiteRuleCores"].(  map[string]map[string][]*RuleCore)
-		e.SiteRuleBacks = exp.( map[string]interface{})["e.SiteRuleBacks"].(  []*RegLuaInfo)
+		if exp.( map[string]interface{})["e.SiteClearFn"]!= nil{
+			e.SiteClearFn = exp.( map[string]interface{})["e.SiteClearFn"].( map[string][]string)
+		}
+		if exp.( map[string]interface{})["e.SiteTag"]!= nil{
+			e.SiteTag = exp.( map[string]interface{})["e.SiteTag"].( map[string][]*Tag)
+		}
+		if  exp.( map[string]interface{})["e.SiteRuleCores"] != nil{
+			e.SiteRuleCores = exp.( map[string]interface{})["e.SiteRuleCores"].(  map[string]map[string][]*RuleCore)
+		}
+		if  exp.( map[string]interface{})["e.SiteRuleBacks"]!= nil{
+			e.SiteRuleBacks = exp.( map[string]interface{})["e.SiteRuleBacks"].(  []*RegLuaInfo)
+		}
 	}
 	qu.Try(func() {
-		pretreated.AnalyStart(j, isSite) //job.Block分块
+		pretreated.AnalyStart(j, isSite,codeSite) //job.Block分块
 		if isextFile {
-			pretreated.AnalyStart(jf, isSite)
+			pretreated.AnalyStart(jf, isSite,codeSite)
 		}
 	}, func(err interface{}) {
 		log.Debug("pretreated.AnalyStart", err, j.SourceMid)
 	})
-	return j, jf, isSite
+	return j, jf, isSite,codeSite
 }
 
 //遍历附件字段内容,拼接在一起;附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
@@ -375,17 +386,17 @@ func file2text(doc *map[string]interface{}) {
 }
 
 //抽取
-func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
-	e.ExtractDetail(j, isSite)
+func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool,codeSite string) {
+	e.ExtractDetail(j, isSite,codeSite)
 	if jf != nil && jf.IsFile {
-		e.ExtractFile(jf, isSite)
+		e.ExtractFile(jf, isSite,codeSite)
 	}
 	//分析抽取结果并保存 todo
 	AnalysisSaveResult(j, jf, e)
 	<-e.TaskInfo.ProcessPool
 }
 
-func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool) {
+func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool,codeSite string) {
 	qu.Try(func() {
 		doc := *j.Data
 		//全局前置规则,结果覆盖doc属性
@@ -513,14 +524,14 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool) {
 				lockclear.Unlock()
 			}
 		}
-		PackageDetail(j, e, isSite) //处理分包信息
+		PackageDetail(j, e, isSite,codeSite) //处理分包信息
 		//		bs, _ := json.Marshal(j.Result)
 		//		 log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
 	}, func(err interface{}) {
 		log.Debug("ExtractProcess err", err)
 	})
 }
-func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool) {
+func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool,codeSite string) {
 	qu.Try(func() {
 		doc := *j.Data
 		//全局前置规则,结果覆盖doc属性
@@ -599,7 +610,7 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool) {
 			}
 		}
 
-		PackageDetail(j, e, isSite) //处理分包信息
+		PackageDetail(j, e, isSite,codeSite) //处理分包信息
 		//		bs, _ := json.Marshal(j.Result)
 		//		 log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
 	}, func(err interface{}) {

+ 36 - 16
src/jy/extract/extractInit.go

@@ -697,27 +697,47 @@ func (e *ExtractTask) InitTag(isSite bool) {
 			sort.Sort(tab.Items)
 			//ju.TagdbTable[fname] = &tab
 			if isSite {
-				ju.SiteTagdbTable.Store(fname, &tab)
-			} else {
-				ju.TagdbTable.Store(fname, &tab)
-			}
-		}
-		if isSite {
-			sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(v["pid"]), bson.M{"site_script": 1})
-			for _, v2 := range (*sm)["site_script"].([]interface{}) {
-				if mdpvalue, ok := Luacodes.Load(v2); ok {
-					if mdpvalue.(map[string]interface{})["e.SiteTag"] == nil{
-						mdpvalue.(map[string]interface{})["e.SiteTag"] = eSiteTag
-					}else {
-						for k2,v2 := range eSiteTag{
-							mdpvalue.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)[k2] = v2
+				sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(v["pid"]), bson.M{"site_script": 1})
+				for _, v2 := range (*sm)["site_script"].([]interface{}) {
+					if  v2 == nil || v2 == "" {
+						continue
+					}
+					if mdpvalue, ok := Luacodes.Load(v2); ok {
+						if mdpvalue.(map[string]interface{})["e.SiteTag"] == nil{
+							mdpvalue.(map[string]interface{})["e.SiteTag"] = eSiteTag
+						}else {
+							for k2,v2 := range eSiteTag{
+								mdpvalue.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)[k2] = v2
+							}
 						}
+						Luacodes.Store(v2, mdpvalue)
 					}
-					Luacodes.Store(v2, mdpvalue)
+					var tmpMap sync.Map
+					tmpMap.Store(fname,&tab)
+					ju.SiteTagdbTable.Store(v2,tmpMap)
 				}
+				//ju.SiteTagdbTable.Store(fname, &tab)
+				eSiteTag = map[string][]*Tag{}
+			} else {
+				ju.TagdbTable.Store(fname, &tab)
 			}
-			eSiteTag = map[string][]*Tag{}
 		}
+		//if isSite {
+		//	sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(v["pid"]), bson.M{"site_script": 1})
+		//	for _, v2 := range (*sm)["site_script"].([]interface{}) {
+		//		if mdpvalue, ok := Luacodes.Load(v2); ok {
+		//			if mdpvalue.(map[string]interface{})["e.SiteTag"] == nil{
+		//				mdpvalue.(map[string]interface{})["e.SiteTag"] = eSiteTag
+		//			}else {
+		//				for k2,v2 := range eSiteTag{
+		//					mdpvalue.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)[k2] = v2
+		//				}
+		//			}
+		//			Luacodes.Store(v2, mdpvalue)
+		//		}
+		//	}
+		//	eSiteTag = map[string][]*Tag{}
+		//}
 	}
 	//正则标签库
 	list, _ = db.Mgo.Find(tagdetailinfodb, `{"s_type":"reg","s_version":"`+e.TaskInfo.Version+`","delete":false}`, nil, nil, false, -1, -1)

+ 12 - 9
src/jy/extract/extractudp.go

@@ -158,14 +158,15 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 				var j, jf *ju.Job
 				var isSite bool
+				var codeSite string
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf,isSite = ext.PreInfo(v)
+					j, jf,isSite,codeSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite = ext.PreInfo(v)
+					j, _,isSite,codeSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
-				go ext.ExtractProcess(j, jf,isSite)
+				go ext.ExtractProcess(j, jf,isSite,codeSite)
 				index++
 			}
 			list2, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl+"_back", query, nil, Fields, false, -1, -1)
@@ -175,14 +176,15 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 				var j, jf *ju.Job
 				var isSite bool
+				var codeSite string
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf,isSite = ext.PreInfo(v)
+					j, jf,isSite,codeSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite = ext.PreInfo(v)
+					j, _,isSite,codeSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
-				go ext.ExtractProcess(j, jf,isSite)
+				go ext.ExtractProcess(j, jf,isSite,codeSite)
 				index++
 			}
 			db.Mgo.UpdateById("esctask", (*tsk)["_id"], map[string]interface{}{
@@ -223,18 +225,19 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				_id := qu.BsonIdToSId(v["_id"])
 				var j, jf *ju.Job
 				var isSite bool
+				var codeSite string
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf,isSite = ext.PreInfo(v)
+					j, jf,isSite,codeSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite = ext.PreInfo(v)
+					j, _,isSite,codeSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
 				wg.Add(1)
 				go func(wg *sync.WaitGroup, j, jf *ju.Job) {
 					defer wg.Done()
 					//log.Debug(index,j.SourceMid,)
-					ext.ExtractProcess(j, jf,isSite)
+					ext.ExtractProcess(j, jf,isSite,codeSite)
 				}(&wg, j, jf)
 				index++
 				if index%1000 == 0 {

+ 2 - 2
src/jy/pretreated/analykv.go

@@ -23,7 +23,7 @@ var matchkh = map[string]string{
 	"〖": "〗",
 }
 
-func GetKvFromtxt(con, tag string, from int,isSite bool) ([]*u.Kv, map[string][]*u.Tag) {
+func GetKvFromtxt(con, tag string, from int,isSite bool,codeSite string) ([]*u.Kv, map[string][]*u.Tag) {
 	res := FindKv(TextAfterRemoveTable(con), tag, from)
 	kvs := []*u.Kv{}
 	for _, k := range res.Keys {
@@ -35,7 +35,7 @@ func GetKvFromtxt(con, tag string, from int,isSite bool) ([]*u.Kv, map[string][]
 			})
 		}
 	}
-	kvTags := GetKvTags(kvs, tag, nil,isSite)
+	kvTags := GetKvTags(kvs, tag, nil,isSite,codeSite)
 	return kvs, kvTags
 }
 

+ 19 - 19
src/jy/pretreated/analystep.go

@@ -12,7 +12,7 @@ import (
 	"github.com/PuerkitoBio/goquery"
 )
 
-func AnalyStart(job *util.Job,isSite bool) {
+func AnalyStart(job *util.Job,isSite bool,codeSite string) {
 	con := job.Content
 	//全文的需要修复表格
 	con = RepairCon(con)
@@ -29,23 +29,23 @@ func AnalyStart(job *util.Job,isSite bool) {
 			ration = newration
 		}
 	}
-	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock,isSite) //分块
+	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock,isSite,codeSite) //分块
 	if len(blockArrays) > 0 { //有分块
 		//从块里面找分包
-		job.BlockPackage = FindPackageFromBlocks(&blockArrays,isSite) //从块里面找分包
+		job.BlockPackage = FindPackageFromBlocks(&blockArrays,isSite,codeSite) //从块里面找分包
 		for _, bl := range blockArrays {
 			//log.Println(bl.Text)
 			if len([]rune(bl.Text)) > 80 {
-				bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock,isSite)
+				bl.Block, _ = DivideBlock(job.CategorySecond, bl.Text, 1, job.RuleBlock,isSite,codeSite)
 				for _, bl_bl := range bl.Block {
-					processTableInBlock(bl_bl, job, false,isSite)
+					processTableInBlock(bl_bl, job, false,isSite,codeSite)
 				}
 			}
 			FindProjectCode(bl.Text, job) //匹配项目编号
-			processTableInBlock(bl, job, true,isSite)
+			processTableInBlock(bl, job, true,isSite,codeSite)
 			//新加 未分块table中未能解析到中标候选人,从正文中解析
 			if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
-				bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1,isSite)
+				bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1,isSite,codeSite)
 				job.Winnerorder = bl.Winnerorder
 			}
 			job.Block = append(job.Block, bl)
@@ -57,33 +57,33 @@ func AnalyStart(job *util.Job,isSite bool) {
 		if len(tabs) > 0 { //解析表格逻辑
 			job.HasTable = 1 //添加标识:文本中有table
 			newCon = TextAfterRemoveTable(con)
-			job.BlockPackage = FindPackageFromText(job.Title, newCon,isSite)
+			job.BlockPackage = FindPackageFromText(job.Title, newCon,isSite,codeSite)
 			for i := 0; i < len(tabs); i++ {
 				//log.Println(tabs[i].Text())
 				//添加标识:文本中有table
-				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock,isSite) //解析表格入口 返回:汇总表格对象
-				processTableResult(tabres, bl, job,isSite)
+				tabres := AnalyTableV2(tabs[i], job.Category, "", con, 1, job.SourceMid, job.RuleBlock,isSite,codeSite) //解析表格入口 返回:汇总表格对象
+				processTableResult(tabres, bl, job,isSite,codeSite)
 			}
 		} else {
 			//从正文里面找分包
-			job.BlockPackage = FindPackageFromText(job.Title, newCon,isSite)
+			job.BlockPackage = FindPackageFromText(job.Title, newCon,isSite,codeSite)
 		}
 		bl.Text = HtmlToText(con)
 		//log.Println(bl.Text)
 		FindProjectCode(bl.Text, job) //匹配项目编号
 		//调用kv解析
-		bl.ColonKV = GetKVAll(bl.Text, "", nil, 1,isSite)
-		bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil,isSite)
+		bl.ColonKV = GetKVAll(bl.Text, "", nil, 1,isSite,codeSite)
+		bl.SpaceKV = SspacekvEntity.Entrance(bl.Text, "", nil,isSite,codeSite)
 		//新加 未分块table中未能解析到中标候选人,从正文中解析
 		if job.Winnerorder == nil || len(job.Winnerorder) == 0 {
-			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1,isSite)
+			bl.Winnerorder = winnerOrderEntity.Find(bl.Text, true, 1,isSite,codeSite)
 			job.Winnerorder = bl.Winnerorder
 		}
 		job.Block = append(job.Block, bl)
 	}
 }
 
-func processTableInBlock(bl *util.Block, job *util.Job, packageFlag,isSite bool) {
+func processTableInBlock(bl *util.Block, job *util.Job, packageFlag,isSite bool,codeSite string) {
 	//块中再查找表格(块,处理完把值赋到块)
 	tabs, _ := ComputeConRatio(bl.Text, 2)
 	for _, tab := range tabs {
@@ -93,12 +93,12 @@ func processTableInBlock(bl *util.Block, job *util.Job, packageFlag,isSite bool)
 			tmptag = strings.TrimSpace(tab.Nodes[0].PrevSibling.Data)
 		}
 		//添加标识:文本中有table
-		tabres := AnalyTableV2(tab, job.Category, tmptag, tab.Text(), 2, job.SourceMid, job.RuleBlock,isSite) //解析表格入口 返回:汇总表格对象
+		tabres := AnalyTableV2(tab, job.Category, tmptag, tab.Text(), 2, job.SourceMid, job.RuleBlock,isSite,codeSite) //解析表格入口 返回:汇总表格对象
 		if packageFlag {
 			tabres.PackageMap = nil
 			tabres.IsMultiPackage = false
 		}
-		processTableResult(tabres, bl, job,isSite) //分析table解析结果
+		processTableResult(tabres, bl, job,isSite,codeSite) //分析table解析结果
 		if bl.Title == "" && tabres.BlockTag != "" {
 			bl.Title = tabres.BlockTag
 		}
@@ -171,7 +171,7 @@ func FindProjectCode(newCon string, job *util.Job) {
 }
 
 //分析table解析结果
-func processTableResult(tabres *TableResult, block *util.Block, job *util.Job,isSite bool) {
+func processTableResult(tabres *TableResult, block *util.Block, job *util.Job,isSite bool,codeSite string) {
 	//解析结果中的kv
 	if block.TableKV == nil {
 		block.TableKV = util.NewJobKv()
@@ -203,7 +203,7 @@ func processTableResult(tabres *TableResult, block *util.Block, job *util.Job,is
 			} else {
 				blockPackage.TableKV = util.NewJobKv()
 			}
-			MergeKvTags(blockPackage.TableKV.KvTags, GetKvTags(labelKVs, "", nil,isSite))
+			MergeKvTags(blockPackage.TableKV.KvTags, GetKvTags(labelKVs, "", nil,isSite,codeSite))
 			tablePackage[v] = blockPackage
 		}
 	}

+ 60 - 60
src/jy/pretreated/analytable.go

@@ -122,7 +122,7 @@ func IsHide(g *goquery.Selection) (b bool) {
 
 //对表格的key进行标准化处理,多个k相同时,出现覆盖问题
 //待扩展,暂不支持正则标签库
-func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool) (kvTags map[string][]*u.Tag, returntag string) {
+func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool,codeSite string) (kvTags map[string][]*u.Tag, returntag string) {
 	kvTags = map[string][]*u.Tag{}
 	v1 := ""
 	if sv, sok := v.(string); sok { //取KV
@@ -141,9 +141,9 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool) (
 	k1 := ClearKey(k, 2)
 	//u.Debug(2, k)
 	//取标准key
-	res := u.GetTags(k1,isSite)
+	res := u.GetTags(k1,isSite,codeSite)
 	if len(res) == 0 && k1 != k {
-		res = u.GetTags(k,isSite)
+		res = u.GetTags(k,isSite,codeSite)
 		k1 = k
 	}
 	//log.Println(k, res)
@@ -202,7 +202,7 @@ func CommonDataAnaly(k, tabletag, tabledesc string, v interface{},isSite bool) (
 }
 
 //对解析后的表格的kv进行过滤
-func (table *Table) KVFilter(isSite bool) {
+func (table *Table) KVFilter(isSite bool,codeSite string) {
 	//1.标准化值查找
 	//2.对数组的处理
 	//3.对分包的处理
@@ -230,7 +230,7 @@ func (table *Table) KVFilter(isSite bool) {
 			if k == `中标价(万元)\费率(%)`{
 				k = "中标价(万元)"
 			}
-			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite) //对key标准化处理,没有找到会走中标
+			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite,codeSite) //对key标准化处理,没有找到会走中标
 			//qutil.Debug(k, v, k1, w1, v1, tag, b)
 			if tag != "" && table.Tag == "" {
 				table.Tag = tag
@@ -242,7 +242,7 @@ func (table *Table) KVFilter(isSite bool) {
 		}
 	}
 	//处理值是数组的kv放入标准化kv中//处理table.SortKV.value为数组的情况
-	table.sortKVArr(as,isSite)
+	table.sortKVArr(as,isSite,codeSite)
 	//
 	if len(table.WinnerOrder) > 0 || !table.BPackage {
 		winnerOrder := []map[string]interface{}{}
@@ -274,7 +274,7 @@ func (table *Table) KVFilter(isSite bool) {
 	L: //遍历每个td,查询中标人
 		for _, tr := range table.TRs {
 			for _, td := range tr.TDs {
-				winnerOrder = winnerOrderEntity.Find(td.Val, true, 3,isSite)
+				winnerOrder = winnerOrderEntity.Find(td.Val, true, 3,isSite,codeSite)
 				if len(winnerOrder) > 0 {
 					break L
 				}
@@ -305,7 +305,7 @@ func (table *Table) KVFilter(isSite bool) {
 }
 
 //处理table.SortKV.value为数组的情况
-func (table *Table) sortKVArr(as *SortMap,isSite bool) {
+func (table *Table) sortKVArr(as *SortMap,isSite bool,codeSite string) {
 	winnertag := iswinnertabletag.MatchString(table.Tag) && !nswinnertabletag.MatchString(table.Tag) //table标签
 	if !winnertag {
 		winnertag = iswinnertabletag.MatchString(table.TableResult.BlockTag) && !nswinnertabletag.MatchString(table.TableResult.BlockTag) //块标签
@@ -340,7 +340,7 @@ func (table *Table) sortKVArr(as *SortMap,isSite bool) {
 							res, _, _, _, repl := CheckCommon(k, "bidorder")
 							kv := ""
 							if !res {
-								kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""),isSite)
+								kt := u.GetTags(filterThText.ReplaceAllString(ClearKey(k, 2), ""),isSite,codeSite)
 								if kt.Len() > 0 {
 									kv = kt[0].Value
 								}
@@ -444,7 +444,7 @@ func (table *Table) sortKVArr(as *SortMap,isSite bool) {
 					}
 				}
 			}
-			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite)
+			kvTags, tag := CommonDataAnaly(k, table.Tag, table.Desc, v,isSite,codeSite)
 			if tag != "" && table.Tag == "" {
 				table.Tag = tag
 			}
@@ -612,7 +612,7 @@ func (table *Table) MergerToTableresult() {
 解析表格入口
 返回:汇总表格对象
 **/
-func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock,isSite bool) (tabres *TableResult) {
+func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype int, _id interface{}, ruleBlock *u.RuleBlock,isSite bool,codeSite string) (tabres *TableResult) {
 	defer qutil.Catch()
 	//u.Debug(con)
 	if itype == 1 {
@@ -630,12 +630,12 @@ func AnalyTableV2(tabs *goquery.Selection, toptype, blockTag, con string, itype
 	tabres.GoqueryTabs = tabs
 	//}
 	//解析表格集
-	tabres.Analy(isSite)
+	tabres.Analy(isSite,codeSite)
 	return
 }
 
 //开始解析表格集
-func (ts *TableResult) Analy(isSite bool) {
+func (ts *TableResult) Analy(isSite bool,codeSite string) {
 	tabs := []*Table{}
 	contactFormat := &u.ContactFormat{
 		IndexMap: map[int]string{},
@@ -644,7 +644,7 @@ func (ts *TableResult) Analy(isSite bool) {
 	//for _, table := range ts.GoqueryTabs {
 	tn := NewTable(ts.Html, ts, ts.GoqueryTabs)
 	//核心模块
-	tsw := tn.Analy(contactFormat,isSite)
+	tsw := tn.Analy(contactFormat,isSite,codeSite)
 	for _, tab := range tsw {
 		if len(tab.TRs) > 0 {
 			tabs = append(tabs, tab)
@@ -742,23 +742,23 @@ func (ts *TableResult) Analy(isSite bool) {
 }
 
 //解析表格
-func (table *Table) Analy(contactFormat *u.ContactFormat,isSite bool) []*Table {
+func (table *Table) Analy(contactFormat *u.ContactFormat,isSite bool,codeSite string) []*Table {
 	//查找表体中的tr对象
 	trs := table.Goquery.ChildrenFiltered("tbody,thead,tfoot").ChildrenFiltered("tr")
 	if trs.Size() == 0 {
 		trs = table.Goquery.ChildrenFiltered("tr")
 	}
 	//遍历节点,初始化table 结构
-	table.createTabe(trs,isSite)
+	table.createTabe(trs,isSite,codeSite)
 	//重置行列
 	table.ComputeRowColSpan()
 	//对table结构体进行整体解析处理
-	ts := table.AnalyTables(contactFormat,isSite)
+	ts := table.AnalyTables(contactFormat,isSite,codeSite)
 	return ts
 }
 
 //遍历节点,初始化table 结构体
-func (table *Table) createTabe(trs *goquery.Selection,isSite bool) {
+func (table *Table) createTabe(trs *goquery.Selection,isSite bool,codeSite string) {
 	trs.Each(func(n int, sel *goquery.Selection) {
 		//隐藏行不处理
 		if IsHide(sel) {
@@ -775,7 +775,7 @@ func (table *Table) createTabe(trs *goquery.Selection,isSite bool) {
 				return
 			}
 			//进入每一个单元格
-			td := NewTD(selm, TR, table,isSite) //初始化td,kv处理,td中有table处理,td的方向
+			td := NewTD(selm, TR, table,isSite,codeSite) //初始化td,kv处理,td中有table处理,td的方向
 			//num++
 			TR.AddTD(td)
 			if td.Val == "" && td.SonTableResult == nil && len(td.SortKV.Map) == 0 { //删除一个tr,tr中所有td是空值的
@@ -793,7 +793,7 @@ func (table *Table) createTabe(trs *goquery.Selection,isSite bool) {
 }
 
 //对table进行整体解析处理
-func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Table {
+func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool,codeSite string) []*Table {
 	ts := tn.tableSubDemolitionTable() //分包,拆表
 	for n, table := range ts {
 		//处理每个table
@@ -802,15 +802,15 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Tabl
 			table.deleteTrimTr()
 			//table.Print()
 			//校对表格
-			table.Adjust(isSite)
+			table.Adjust(isSite,codeSite)
 			//查找表格的标签,table.Tag字段
 			table.FindTag()
 			//log.Println(table.TableResult.Id, table.Html)
 			//分割表格
-			table.bSplit(n, ts,isSite)
-			table.TdContactFormat(contactFormat,isSite) //contactFormat,处理采购单位,代理机构
+			table.bSplit(n, ts,isSite,codeSite)
+			table.TdContactFormat(contactFormat,isSite,codeSite) //contactFormat,处理采购单位,代理机构
 			//开始查找kv,核心模块,table.SortKV
-			table.FindKV(isSite)
+			table.FindKV(isSite,codeSite)
 			//table中抽取品牌,table.BrandData
 			if u.IsBrandGoods {
 				table.analyBrand()
@@ -818,7 +818,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Tabl
 			res, _, _, _, _ := CheckCommon(table.Tag, "abandontable")
 			if !res {
 				//过滤、标准化、合并kv,table.StandKV,table.StandKVWeight
-				table.KVFilter(isSite)
+				table.KVFilter(isSite,codeSite)
 			}
 			//对没有表头表格的处理
 			if table.Tag != "" {
@@ -848,7 +848,7 @@ func (tn *Table) AnalyTables(contactFormat *u.ContactFormat,isSite bool) []*Tabl
 				}
 			}
 			//判断是否是多包,并处理分包的//遍历td分块
-			table.CheckMultiPackageByTable(isSite)
+			table.CheckMultiPackageByTable(isSite,codeSite)
 			//MergeKvTags(table.TableResult.KvTags, table.StandKV)
 		}
 	}
@@ -913,7 +913,7 @@ func (table *Table) tableSubDemolitionTable() []*Table {
 }
 
 //分割表格
-func (table *Table) bSplit(n int, ts []*Table,isSite bool) {
+func (table *Table) bSplit(n int, ts []*Table,isSite bool,codeSite string) {
 	if table.BSplit {
 		if !table.BHeader && n > 0 {
 			for i := n - 1; i > -1; i-- {
@@ -921,7 +921,7 @@ func (table *Table) bSplit(n int, ts []*Table,isSite bool) {
 					if ts[i].BFirstRow {
 						//取第一行插入到
 						table.InsertTR(ts[i].TRs[0])
-						table.Adjust(isSite)
+						table.Adjust(isSite,codeSite)
 					}
 					break
 				}
@@ -952,7 +952,7 @@ func (table *Table) deleteTrimTr() {
 }
 
 //校对表格
-func (table *Table) Adjust(isSite bool) {
+func (table *Table) Adjust(isSite bool,codeSite string) {
 	//计算行列起止位置,跨行跨列处理
 	table.ComputeRowColSpan()
 	//	for k1, tr := range table.TRs {
@@ -987,7 +987,7 @@ func (table *Table) Adjust(isSite bool) {
 	}
 	if float32(count)/float32(table.TDNum) < 0.85 {
 		//精确计算起止行列是表头的概率
-		table.ComputeRowColIsKeyRation(isSite)
+		table.ComputeRowColIsKeyRation(isSite,codeSite)
 		bhead := false
 	L:
 		for i, tr := range table.TRs {
@@ -999,7 +999,7 @@ func (table *Table) Adjust(isSite bool) {
 						if res {
 							//删除此行
 							table.TRs = table.TRs[:len(table.TRs)-1]
-							table.Adjust(isSite)
+							table.Adjust(isSite,codeSite)
 							return
 						}
 					}
@@ -1118,7 +1118,7 @@ func (table *Table) GetKeyRation() {
 }
 
 //计算行列是表头的概率调用GetKeyRation
-func (table *Table) ComputeRowColIsKeyRation(isSite bool) {
+func (table *Table) ComputeRowColIsKeyRation(isSite bool,codeSite string) {
 	//增加对跨行校正限止
 	//	u.Debug(table.Brule, table.ColNum, table.RowNum, table.TDNum)
 	bkeyfirstrow := false
@@ -1357,7 +1357,7 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool) {
 				tr.TDs[0].BH = false
 				tr.TDs[0].KVDirect = 0
 				sv := FindKv(tr.TDs[0].Val, "", 2)
-				_, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2,isSite)
+				_, resm := colonkvEntity.entrance(tr.TDs[0].Val, "", nil, 2,isSite,codeSite)
 				for k, v := range resm {
 					sv.AddKey(k, v)
 				}
@@ -1392,7 +1392,7 @@ func (table *Table) ComputeRowColIsKeyRation(isSite bool) {
 }
 
 //查找表格的kv,调用FindTdVal
-func (table *Table) FindKV(isSite bool) {
+func (table *Table) FindKV(isSite bool,codeSite string) {
 	//判断全是key的表格不再查找
 	if table.BHeader { //只要一个是key即为true
 		direct := If(table.BFirstRow, 2, 1).(int) //kv,2查找方向,向上查找
@@ -1468,7 +1468,7 @@ func (table *Table) FindKV(isSite bool) {
 			for n, r := range r1 {
 				if len([]rune(r)) < 60 { // 长度小于60才去分
 					//res1, _ := GetKVAll(r, "", nil)
-					res1, _ := colonkvEntity.entrance(r, "", nil, 2,isSite)
+					res1, _ := colonkvEntity.entrance(r, "", nil, 2,isSite,codeSite)
 					if res1 != nil {
 						nmap[n] = res1
 						nmapkeys = append(nmapkeys, n)
@@ -1900,7 +1900,7 @@ func (tn *Table) GetTdByRCNo(row, col int) *TD {
 }
 
 //判断表格是否是分包
-func (tn *Table) CheckMultiPackageByTable(isSite bool) (b bool, index []string) {
+func (tn *Table) CheckMultiPackageByTable(isSite bool,codeSite string) (b bool, index []string) {
 	pac := 0             //包的数量
 	val := 0             //分值
 	index = []string{}   //存储分包,使用tbale.SortKV的key和value使用正则等处理对值进行判断
@@ -1972,20 +1972,20 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool) (b bool, index []string)
 					tn.BlockPackage.AddKey(v, bp) //table子包数组
 				}
 			}
-			isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos,isSite) //多包处理,处理不同情况下的分包
+			isGoonNext = tn.manyPackageProcessByIndex(index, standIndex_pos,isSite,codeSite) //多包处理,处理不同情况下的分包
 		}
 	} else {
 		isGoonNext = true
 	}
 	if isGoonNext { //没有处理成数组的情况下,继续调用正文查找分包的方法
-		tn.isGoonNext(isSite)
+		tn.isGoonNext(isSite,codeSite)
 	}
 	//查找分包中的中标人排序
 	if tn.BlockPackage != nil && tn.BlockPackage.Keys != nil && len(tn.BlockPackage.Keys) > 0 {
 		for _, v := range tn.BlockPackage.Keys {
 			vv, ok := tn.BlockPackage.Map[v].(*u.BlockPackage)
 			if ok && (vv.WinnerOrder == nil || len(vv.WinnerOrder) == 0) {
-				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2,isSite)
+				vv.WinnerOrder = winnerOrderEntity.Find(vv.Text, true, 2,isSite,codeSite)
 			}
 		}
 	}
@@ -1993,7 +1993,7 @@ func (tn *Table) CheckMultiPackageByTable(isSite bool) (b bool, index []string)
 }
 
 //多包处理,处理不同情况下的分包
-func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,isSite bool) (isGoonNext bool) {
+func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,isSite bool,codeSite string) (isGoonNext bool) {
 	if len(index) == 1 { //是一个的情况
 		if len(tn.SortKV.Keys) < 10 && tn.ColNum < 10 && tn.RowNum < 4 { //table带排序的KV值小于10并且小于10列和小于4行
 			beq := true
@@ -2034,7 +2034,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 		if val, bvs := v1.([]string); bvs {
 			if len(val) <= len(index) { //table.SortKV.Map.value数组小于等于分包index
 				for k, v := range val {
-					tn.assemblePackage(k1, v, index[k],isSite) //组装解析到的分包
+					tn.assemblePackage(k1, v, index[k],isSite,codeSite) //组装解析到的分包
 				}
 			} else {
 				for sk1, sv2 := range index {
@@ -2052,12 +2052,12 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 							}
 						}
 					}
-					tn.assemblePackage(k1, v, sv2,isSite)
+					tn.assemblePackage(k1, v, sv2,isSite,codeSite)
 				}
 			}
 			//删除子包的kv
 			//u.Debug("----==1==-------", k1)
-			k1tags := u.GetTags(k1,isSite) //取得匹配
+			k1tags := u.GetTags(k1,isSite,codeSite) //取得匹配
 			//if !(len(k1tags) > 0 && k1tags[0].Value == "采购单位") {
 			//	tn.SortKV.RemoveKey(k1)
 			//}
@@ -2067,7 +2067,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 			}
 		} else if val, bvs := v1.(string); bvs && len(index) == 1 {
 			//删除子包的kv
-			kvTags, _ := CommonDataAnaly(k1, "", "", val,isSite)
+			kvTags, _ := CommonDataAnaly(k1, "", "", val,isSite,codeSite)
 			for kvTag_k, kvTag_v := range kvTags {
 				hasValid := false
 				for _, kvTag_vv := range kvTag_v {
@@ -2081,7 +2081,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 				}
 				if !(len(kvTags) > 0 && regexp.MustCompile("^(项目|开标|采购单位|招标机构)").MatchString(kvTag_k)) {
 					tn.SortKV.RemoveKey(k1)
-					tn.assemblePackage(k1, val, index[0],isSite)
+					tn.assemblePackage(k1, val, index[0],isSite,codeSite)
 					//log.Println("remove", k1, val)
 				}
 			}
@@ -2093,7 +2093,7 @@ func (tn *Table) manyPackageProcessByIndex(index []string, standIndex_pos []int,
 }
 
 //没有处理成数组的情况下,继续调用正文查找分包的方法
-func (tn *Table) isGoonNext(isSite bool) {
+func (tn *Table) isGoonNext(isSite bool,codeSite string) {
 	blockPackage := map[string]*u.BlockPackage{}
 	for _, k := range tn.SortKV.Keys {
 		if excludeKey.MatchString(k) || strings.Contains(k, "批复") {
@@ -2107,7 +2107,7 @@ func (tn *Table) isGoonNext(isSite bool) {
 		} else {
 			str += fmt.Sprintf("%s:%s\n", nk, v)
 		}
-		b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false,isSite) //分块之后分包
+		b, _ := divisionPackageChild(&blockPackage, str, tn.Tag, false, false,isSite,codeSite) //分块之后分包
 		if b && len(blockPackage) > 0 {
 			tn.BPackage = true
 			for mk, mv := range blockPackage {
@@ -2314,13 +2314,13 @@ func initCheckMultiPackageByTable(tn *Table, key_index int, index []string, inde
 }
 
 //组装解析到的分包,//key如果匹配到抽取关键词就添加到table.SortKV
-func (tn *Table) assemblePackage(k1, v1, key string,isSite bool) {
+func (tn *Table) assemblePackage(k1, v1, key string,isSite bool,codeSite string) {
 	bp := tn.BlockPackage.Map[key].(*u.BlockPackage)
 	if bp.TableKV == nil {
 		bp.TableKV = u.NewJobKv()
 	}
 	if v1 != "" {
-		kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1,isSite) //匹配抽取关键词
+		kvTags, _ := CommonDataAnaly(k1, "中标情况", "", v1,isSite,codeSite) //匹配抽取关键词
 		for k3, v3 := range kvTags {
 			bp.TableKV.KvTags[k3] = append(bp.TableKV.KvTags[k3], v3...)
 		}
@@ -2477,7 +2477,7 @@ func replPkgConfusion(v1 string) string {
 }
 
 //对td中的值,进行再处理
-func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat,isSite bool) {
+func (tn *Table) TdContactFormat(contactFormat *u.ContactFormat,isSite bool,codeSite string) {
 	//处理表格中的联系人信息
 	indexMap := contactFormat.IndexMap
 	matchMap := contactFormat.MatchMap
@@ -2549,7 +2549,7 @@ L:
 			//和|以?及|与|、多个词和在一起
 			jumpNextTd, thisTrHasMatch = tn.tdsMultipleWords(jumpNextTd, td, td_index, tr, thisTrHasMatch, indexMap)
 			//分块之后的kv
-			thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock,isSite)
+			thisTdKvs := kvAfterDivideBlock("", td.Text, 3, tn.TableResult.RuleBlock,isSite,codeSite)
 			if len(thisTdKvs) == 0 {
 				thisTdKvs = tn.tdkv(td) //获取冒号kv
 			}
@@ -2577,7 +2577,7 @@ L:
 				//都为正序查询
 				if allAscFind && tdAscFind {
 					//都为正序查询处理
-					matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex,isSite)
+					matchCount, weightMap, matchMap, thisTrHasMatch, indexMap, iscontinue, reCreate, thidTdIndex = tn.asdFind(td_k, matchCount, weightMap, matchMap, td, thisTrHasMatch, td_kv, indexMap, iscontinue, reCreate, thidTdIndex,isSite,codeSite)
 				}
 				if iscontinue {
 					continue
@@ -2647,7 +2647,7 @@ L:
 					}
 					thisTrHasMatch = true
 					//modle
-					modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index,isSite)
+					modle(thisTdKvs, td, myContactType, td_k, td_v, &contactTypeTagMap, tn, &weightMap, tr_index, td_index,isSite,codeSite)
 				}
 			}
 			//u.Debug(td.SortKV.Map)
@@ -2675,7 +2675,7 @@ L:
 }
 
 //modle
-func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int,isSite bool) {
+func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactTypeTagMap *map[string]map[string][]interface{}, tn *Table, weightMap *map[string]map[string]interface{}, tr_index, td_index int,isSite bool,codeSite string) {
 	modle := 0
 	if len(thisTdKvs) == 1 {
 		if regReplAllSpace.ReplaceAllString(thisTdKvs[0].Value, "") == "" {
@@ -2690,7 +2690,7 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
 	} else {
 		//
 		if !strings.HasSuffix(td_k, "方式") {
-			kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts,isSite)
+			kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: myContactType + td_k, Value: td_v}}, "", BuyerContacts,isSite,codeSite)
 			if len(kvTags) == 1 {
 				tagVal, _ := u.FirstKeyValueInMap(kvTags)
 				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {
@@ -2717,7 +2717,7 @@ func modle(thisTdKvs []*u.Kv, td *TD, myContactType, td_k, td_v string, contactT
 }
 
 //都为正序查询
-func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int,isSite bool) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
+func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[string]interface{}, matchMap map[string]map[string]bool, td *TD, thisTrHasMatch bool, td_kv *u.Kv, indexMap map[int]string, iscontinue bool, reCreate bool, thidTdIndex int,isSite bool,codeSite string) (int, map[string]map[string]interface{}, map[string]map[string]bool, bool, map[int]string, bool, bool, int) {
 	for _, k := range HasOrderContactType(td_k) { //采购单位,代理机构
 		if !ContactType[k].MatchString(td_k) { //没有匹配到采购单位,代理机构
 			continue
@@ -2728,9 +2728,9 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
 		}
 		//匹配到进行处理
 		if ContactInfoVagueReg.MatchString(td_k) {
-			thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch,isSite)
+			thisTrHasMatch = tn.matchContactType(&matchMap, k, td_k, td_kv.Value, td, &weightMap, thisTrHasMatch,isSite,codeSite)
 		} else if k == "采购单位" { //打标签,权重高的重新覆盖
-			kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"},isSite)
+			kvTags := GetKvTags([]*u.Kv{td_kv}, "", []string{"采购单位"},isSite,codeSite)
 			tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
 			if tagVal == k {
 				if weightMap[k][k] == nil || (weightVal != nil && weightVal.(int) >= weightMap[k][k].(int)) || len(matchMap[k]) == 0 {
@@ -2781,13 +2781,13 @@ func (tn *Table) asdFind(td_k string, matchCount int, weightMap map[string]map[s
 }
 
 //匹配到进行处理
-func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool,isSite bool) bool {
+func (tn *Table) matchContactType(matchMap *map[string]map[string]bool, k string, td_k string, td_v string, td *TD, weightMap *map[string]map[string]interface{}, thisTrHasMatch bool,isSite bool,codeSite string) bool {
 	if (*matchMap)[k] == nil {
 		(*matchMap)[k] = map[string]bool{}
 	}
 	isAddToMatchMap := true
 	if !strings.HasSuffix(td_k, "方式") {
-		kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts,isSite)
+		kvTags := GetKvTags([]*u.Kv{&u.Kv{Key: td_k, Value: td_v}}, "", BuyerContacts,isSite,codeSite)
 		if len(kvTags) == 1 {
 			tagVal, weightVal := u.FirstKeyValueInMap(kvTags)
 			if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(td_v) {

+ 19 - 19
src/jy/pretreated/colonkv.go

@@ -21,7 +21,7 @@ var (
 	regKV          = regexp.MustCompile("([\\p{Han}][^,,。、.;;\r\n]{1,30}?)[::](.*)")
 	filterK        = regexp.MustCompile("[((\\[【].*?[))\\]】]|<[^>].+?>|[①②③¥·;;‘“'’”,*<>((\\[【、))/\\]】??,。.\".\\s\u3000\u2003\u00a0]+|^[一二三四五六七八九十0-91234567890]+")
 	filterValue    = regexp.MustCompile("(^(无)$|.+%.*|^[\r\n\\s\u3000\u2003\u00a0]+$|^<.*>)")
-	regReplKey     = regexp.MustCompile("^(包(.+[A-Za-z\\d])?|本项目|推荐|的|本次)|([约为元万亿]+|[大小]写|人民币|[全]称|姓名)$")
+	regReplKey     = regexp.MustCompile("^(包(.+[A-Za-z\\d])?|本项目|推荐|的|本次)|([约为元万亿]+|[大小]写|人民币|[全]称|姓名)$")
 	buyerAndAgency = regexp.MustCompile("(代理(机构|人)|采购(人|单位))")
 	BlockTagMap    = map[string]bool{
 		"招标范围": true,
@@ -67,10 +67,10 @@ func (ce *ColonkvEntity) divisionMoreKV(con string) string {
 }
 
 //获取冒号kv入口
-func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int,isSite bool) ([]*Kv, map[string]string) {
+func (ce *ColonkvEntity) entrance(con, title string, contactFormat *ContactFormat, from int,isSite bool,codeSite string) ([]*Kv, map[string]string) {
 	kvs := ce.GetKvs(con, title, from)
 	if from == 1 {
-		FormatContactKv(&kvs, title, nil, contactFormat,isSite)
+		FormatContactKv(&kvs, title, nil, contactFormat,isSite,codeSite)
 	}
 	kv := map[string]string{}
 	for _, v := range kvs {
@@ -163,14 +163,14 @@ func (ce *ColonkvEntity) getColonKv(con, title string, from int) []*Kv {
 }
 
 //冒号kv和空格kv结合
-func (ce *ColonkvEntity) getColonSpaceKV(con string,isSite bool) []*Kv {
+func (ce *ColonkvEntity) getColonSpaceKV(con string,isSite bool,codeSite string) []*Kv {
 	con = colonkvEntity.processText(con)
 	lines := SspacekvEntity.getLines(con)
 	kvMaps := []*Kv{}
 	for _, line := range lines {
 		kvs := colonkvEntity.getColonKv(line, "", 1)
 		if len(kvs) == 0 {
-			kv := SspacekvEntity.divideKV(line,isSite)
+			kv := SspacekvEntity.divideKV(line,isSite,codeSite)
 			if kv != nil {
 				kvMaps = append(kvMaps, kv...)
 			}
@@ -276,7 +276,7 @@ func IsContactKvHandle(value string, m map[string]bool) bool {
 
 //kv关于联系人信息的处理
 //采购人>集中采购机构
-func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat,isSite bool) {
+func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *ContactFormat,isSite bool,codeSite string) {
 	////////////////////////////
 	//处理联系人信息
 	var indexMap map[int]string
@@ -436,7 +436,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 						matchMap[ct_k] = map[string]bool{}
 					}
 					if !strings.HasSuffix(k, "方式") {
-						kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts,isSite)
+						kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", BuyerContacts,isSite,codeSite)
 						if len(kvTags) == 1 {
 							tagVal, weightVal := FirstKeyValueInMap(kvTags)
 							if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
@@ -468,7 +468,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 					}
 				}
 				if ct_k == "采购单位" { //打标签,权重高的重新覆盖
-					kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"},isSite)
+					kvTags := GetKvTags([]*Kv{&Kv{Key: k, Value: v}}, "", []string{"采购单位"},isSite,codeSite)
 					tagVal, weightVal := FirstKeyValueInMap(kvTags)
 					if tagVal == ct_k {
 						if weightMap[ct_k][ct_k] == nil || (weightVal != nil && weightVal.(int) > weightMap[ct_k][ct_k].(int)) {
@@ -595,7 +595,7 @@ func FormatContactKv(kvs *[]*Kv, title string, buyers []string, contactFormat *C
 		allMatchCount++
 		delete(totalIndexMap, myContactType)
 		if !strings.HasSuffix(k, "方式") {
-			kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts,isSite)
+			kvTags := GetKvTags([]*Kv{&Kv{Key: myContactType + k, Value: v}}, "", BuyerContacts,isSite,codeSite)
 			if len(kvTags) == 1 {
 				tagVal, _ := FirstKeyValueInMap(kvTags)
 				if tagVal == "采购单位联系人" && ContactBuyerPersonFilterReg.MatchString(v) {
@@ -707,20 +707,20 @@ func HasOrderContactType(text string) []string {
 
 //两种冒号kv结合到一起
 //from 1--全文 2--table td 3--table td解析采购单位联系人 4--分包
-func GetKVAll(content, title string, contactFormat *ContactFormat, from int,isSite bool) *JobKv {
+func GetKVAll(content, title string, contactFormat *ContactFormat, from int,isSite bool,codeSite string) *JobKv {
 	content = formatText(content, "kv")
-	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from,isSite)
+	m1Kvs, _ := colonkvEntity.entrance(content, title, contactFormat, from,isSite,codeSite)
 	//	for _, kvs := range m1Kvs {
 	//		qutil.Debug(kvs.Key, kvs.Value)
 	//	}
-	kvTags := GetKvTags(m1Kvs, title, nil,isSite)
+	kvTags := GetKvTags(m1Kvs, title, nil,isSite,codeSite)
 	//	for k, kvs := range kvTags {
 	//		qutil.Debug("kkkkk--", k)
 	//		for _, kv := range kvs {
 	//			qutil.Debug(kv.Key, kv.Value)
 	//		}
 	//	}
-	m2Kvs, m2KvTags := GetKvFromtxt(content, title, from,isSite)
+	m2Kvs, m2KvTags := GetKvFromtxt(content, title, from,isSite,codeSite)
 	//	for k, kvs := range m2KvTags {
 	//		qutil.Debug("kkkkk--", k)
 	//		for _, kv := range kvs {
@@ -774,7 +774,7 @@ func PrintKvTags(kvTags map[string][]*Tag) {
 }
 
 //KVTags转kv
-func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool) map[string][]*Tag {
+func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool,codeSite string) map[string][]*Tag {
 	kvTags := map[string][]*Tag{}
 	if title != "" && BlockTagMap[title] {
 		kvTags[title] = append(kvTags[title], &Tag{title, title, 0, nil, false})
@@ -792,17 +792,17 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool) map[str
 		}
 		key = colonkvEntity.blockTitleKV(title, key)
 		//先用新的key
-		tags := GetAppointTags(key, tagdbs,isSite) //找标签库
+		tags := GetAppointTags(key, tagdbs,isSite,codeSite) //找标签库
 		if len(tags) == 0 && len(key) < 10 && len(title) > 0 && len(title) < 15 {
 			key = title + key
-			tags = GetAppointTags(key, tagdbs,isSite)
+			tags = GetAppointTags(key, tagdbs,isSite,codeSite)
 		}
 		//再用老的key
 		if len(tags) == 0 && k != key {
-			tags = GetAppointTags(k, tagdbs,isSite)
+			tags = GetAppointTags(k, tagdbs,isSite,codeSite)
 			if len(tags) == 0 && len(k) < 10 && len(title) > 0 && len(title) < 15 {
 				k = title + k
-				tags = GetAppointTags(k, tagdbs,isSite)
+				tags = GetAppointTags(k, tagdbs,isSite,codeSite)
 				if len(tags) > 0 {
 					key = k
 				}
@@ -824,7 +824,7 @@ func GetKvTags(findkvs []*Kv, title string, tagdbs []string,isSite bool) map[str
 							if strings.TrimSpace(nextval) == "" {
 								continue
 							}
-							if GetAppointTags(nextval, tagdbs,isSite).Len() > 0 || GetAppointTags(k, tagdbs,isSite).Len() > 0 {
+							if GetAppointTags(nextval, tagdbs,isSite,codeSite).Len() > 0 || GetAppointTags(k, tagdbs,isSite,codeSite).Len() > 0 {
 								continue
 							}
 						}

+ 17 - 17
src/jy/pretreated/division.go

@@ -87,7 +87,7 @@ var (
 )
 
 //分块
-func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite bool) ([]*util.Block, int) {
+func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite bool,codeSite string) ([]*util.Block, int) {
 	defer qutil.Catch()
 	returnValue := 0
 	var blocks []*util.Block
@@ -299,8 +299,8 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock,isSite
 	for _, bl := range returnBlocks {
 		//解析kv
 		newText := TextAfterRemoveTable(bl.Text)
-		bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from,isSite)
-		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat,isSite)
+		bl.ColonKV = GetKVAll(newText, bl.Title, contactFormat, from,isSite,codeSite)
+		bl.SpaceKV = SspacekvEntity.Entrance(newText, bl.Title, contactFormat,isSite,codeSite)
 		//正则抽取的时候有时需要匹配换行或者句号,这里在解析完kv之后,在块结尾添加换行和句号
 		bl.Text = appendWarpStop(bl.Text)
 	}
@@ -543,7 +543,7 @@ func filterTitle(title string) string {
 }
 
 //从块里面找分包
-func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool) (blockPackage map[string]*util.BlockPackage) {
+func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool,codeSite string) (blockPackage map[string]*util.BlockPackage) {
 	blockPackage = map[string]*util.BlockPackage{}
 	//块分包
 	for _, v := range *blocks {
@@ -554,7 +554,7 @@ func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool) (blockPackage map[
 		}
 		//var ok bool
 		//var surplusText string
-			divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"],isSite)
+			divisionPackageChild(&blockPackage, text, v.Title, true, v.Tag["中标单位"],isSite,codeSite)
 		////把分包内容摘除掉有问题 有的项目名称中包含二标段
 		//if ok && false {
 		//	v.Text = surplusText
@@ -566,15 +566,15 @@ func FindPackageFromBlocks(blocks *[]*util.Block,isSite bool) (blockPackage map[
 }
 
 //从正文里面找分包
-func FindPackageFromText(title string, content string,isSite bool) (blockPackage map[string]*util.BlockPackage) {
+func FindPackageFromText(title string, content string,isSite bool,codeSite string) (blockPackage map[string]*util.BlockPackage) {
 	blockPackage = map[string]*util.BlockPackage{}
 	//从正文里面找分包
-	divisionPackageChild(&blockPackage, content, title, true, false,isSite)
+	divisionPackageChild(&blockPackage, content, title, true, false,isSite,codeSite)
 	return
 }
 
 //分块之后分包
-func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool,isSite bool) (bool, string) {
+func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content, title string, isFindWinnerOrder, accuracy bool,isSite bool,codeSite string) (bool, string) {
 	//查找知否有分包
 	content = regMoreWrap.ReplaceAllString(content, "\n")
 	content = regEndWrap.ReplaceAllString(content, "")
@@ -708,14 +708,14 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 				//合并文本
 				(*blockPackage)[index].Text += "\n" + text
 				//合并冒号kv
-				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1,isSite)
+				colonJobKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 1,isSite,codeSite)
 				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4,isSite)
+					kvAgain := GetKVAll(text, "", nil, 4,isSite,codeSite)
 					MergeKvTags(colonJobKv.KvTags, kvAgain.KvTags)
 				}
 				MergeKvTags((*blockPackage)[index].ColonKV.KvTags, colonJobKv.KvTags)
 				//合并空格kv
-				spaceJobKv := SspacekvEntity.Entrance(text, "", nil,isSite)
+				spaceJobKv := SspacekvEntity.Entrance(text, "", nil,isSite,codeSite)
 				MergeKvTags((*blockPackage)[index].SpaceKV.KvTags, spaceJobKv.KvTags)
 			} else {
 				newBpkg := &util.BlockPackage{
@@ -725,13 +725,13 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 					Type:     bv[1],
 					Accuracy: accuracy,
 				}
-				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4,isSite)
+				finalKv := GetKVAll(strings.TrimLeft(text, headKey), "", nil, 4,isSite,codeSite)
 				if headKey != "" {
-					kvAgain := GetKVAll(text, "", nil, 4,isSite)
+					kvAgain := GetKVAll(text, "", nil, 4,isSite,codeSite)
 					MergeKvTags(finalKv.KvTags, kvAgain.KvTags)
 				}
 				newBpkg.ColonKV = finalKv
-				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil,isSite)
+				newBpkg.SpaceKV = SspacekvEntity.Entrance(text, "", nil,isSite,codeSite)
 				(*blockPackage)[index] = newBpkg
 			}
 		}
@@ -739,7 +739,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 	//中标人排序
 	if isFindWinnerOrder && blockPackage != nil && len(*blockPackage) > 0 {
 		for _, v := range *blockPackage {
-			v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2,isSite)
+			v.WinnerOrder = winnerOrderEntity.Find(v.Text, true, 2,isSite,codeSite)
 		}
 	}
 	return true, surplusText
@@ -852,8 +852,8 @@ func interceptText(indexs []int, indexPkgMap map[int]string, pkgIndexMap map[str
 }
 
 //分块之后的kv
-func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock,isSite bool) []*util.Kv {
-	blocks, _ := DivideBlock(tp, text, from, ruleBlock,isSite)
+func kvAfterDivideBlock(tp, text string, from int, ruleBlock *util.RuleBlock,isSite bool,codeSite string) []*util.Kv {
+	blocks, _ := DivideBlock(tp, text, from, ruleBlock,isSite,codeSite)
 	kvs := []*util.Kv{}
 	for _, v := range blocks {
 		//util.Debug(v.Text)

+ 6 - 6
src/jy/pretreated/spacekv.go

@@ -16,18 +16,18 @@ var (
 	excludeSpaceKey = regexp.MustCompile("[.、�\\[【{{〔<《\\]】}}〕>》]")
 )
 
-func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.ContactFormat,isSite bool) *util.JobKv {
+func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.ContactFormat,isSite bool,codeSite string) *util.JobKv {
 	lines := se.getLines(text)
 	kvMaps := []*util.Kv{}
 	for _, line := range lines {
-		kvMap := se.divideKV(line,isSite)
+		kvMap := se.divideKV(line,isSite,codeSite)
 		if kvMap == nil {
 			continue
 		}
 		kvMaps = append(kvMaps, kvMap...)
 	}
-	FormatContactKv(&kvMaps, title, nil, contactFormat,isSite)
-	kvTags := GetKvTags(kvMaps, title, nil,isSite)
+	FormatContactKv(&kvMaps, title, nil, contactFormat,isSite,codeSite)
+	kvTags := GetKvTags(kvMaps, title, nil,isSite,codeSite)
 	return &util.JobKv{
 		Kvs:    kvMaps,
 		KvTags: kvTags,
@@ -35,7 +35,7 @@ func (se *SpacekvEntity) Entrance(text, title string, contactFormat *util.Contac
 }
 
 //空格分kv
-func (se *SpacekvEntity) divideKV(line string,isSite bool) []*util.Kv {
+func (se *SpacekvEntity) divideKV(line string,isSite bool,codeSite string) []*util.Kv {
 	line = strings.TrimSpace(line)
 	line = regReplAllSpace.ReplaceAllString(line, " ")
 	line = TimeHM.ReplaceAllString(line, "D$1H$2M")
@@ -56,7 +56,7 @@ func (se *SpacekvEntity) divideKV(line string,isSite bool) []*util.Kv {
 			continue
 		}
 		//value为key值跳过
-		if util.GetTags(v,isSite).Len() > 0 && util.GetTags(k,isSite).Len() > 0{
+		if util.GetTags(v,isSite,codeSite).Len() > 0 && util.GetTags(k,isSite,codeSite).Len() > 0{
 			continue
 		}
 		kvs = append(kvs, &util.Kv{Key: k, Value: v})

+ 12 - 12
src/jy/pretreated/tablev2.go

@@ -87,7 +87,7 @@ var submatchreg = regexp.MustCompile(`((?:[一二三四五六七八九十0-10]+[
 var BHKey = regexp.MustCompile(`^[^,,;:。、.]{2,8}.{0,3}[::].+$`)
 var dwReg = regexp.MustCompile("单位[::/ \\s\u3000\u2003\u00a0\\n]*([万亿元]+)")
 
-func NewTD(Goquery *goquery.Selection, tr *TR, table *Table,isSite bool) *TD {
+func NewTD(Goquery *goquery.Selection, tr *TR, table *Table,isSite bool,codeSite string) *TD {
 	defer qutil.Catch()
 	td := &TD{
 		ArrVal:  []string{},
@@ -121,7 +121,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table,isSite bool) *TD {
 		//qutil.Debug("有子表格")
 		//格式化正文
 		txt = TextAfterRemoveTable(td.Html)
-		td.tdHasTable(&bsontable, tr,isSite) //处理td中的table,块标签处理,子表解析集处理
+		td.tdHasTable(&bsontable, tr,isSite,codeSite) //处理td中的table,块标签处理,子表解析集处理
 	} else {
 		txt = strings.TrimSpace(td.Goquery.Text())
 	}
@@ -130,7 +130,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table,isSite bool) *TD {
 	td.Text = txt //原始串
 	//处理table外内容
 	var ub []*u.Block
-	ub, _ = DivideBlock("", txt, 2, table.TableResult.RuleBlock,isSite)
+	ub, _ = DivideBlock("", txt, 2, table.TableResult.RuleBlock,isSite,codeSite)
 	//看是否划块
 	if len(ub) > 0 {
 		for _, bl := range ub {
@@ -175,7 +175,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table,isSite bool) *TD {
 		}
 	}
 	//对td单元格值判断是否是表头和根据td内容长度进行分块处理
-	td.tdIsHb(tr, table, bsontable,isSite)
+	td.tdIsHb(tr, table, bsontable,isSite,codeSite)
 	bhead := false
 	if td.TR.RowPos == 0 { //第一行
 		if td.Goquery.Closest("thead").Size() == 1 && !bsontable { //如果是thead确定为k值表头
@@ -192,7 +192,7 @@ func NewTD(Goquery *goquery.Selection, tr *TR, table *Table,isSite bool) *TD {
 }
 
 //处理td中的table,块标签处理,子表解析集处理
-func (td *TD) tdHasTable(bsontable *bool, tr *TR,isSite bool) {
+func (td *TD) tdHasTable(bsontable *bool, tr *TR,isSite bool,codeSite string) {
 	ts := td.TR.Table.TableResult
 	tabs, _ := ComputeConRatio(td.Html, 2) //计算表格占比
 	if len(tabs) > 0 {
@@ -225,7 +225,7 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR,isSite bool) {
 			}
 			sonts := NewTableResult(ts.Id, ts.Toptype, stag, td.Html, 2, td.TR.Table.TableResult.RuleBlock)
 			sonts.GoqueryTabs = tv
-			sonts.Analy(isSite)
+			sonts.Analy(isSite,codeSite)
 
 			//sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
 			td.BH = false
@@ -300,7 +300,7 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR,isSite bool) {
 }
 
 //对td单元格值判断是否是表头和根据td内容长度进行分块处理
-func (td *TD) tdIsHb(tr *TR, table *Table, bsontable,isSite bool) {
+func (td *TD) tdIsHb(tr *TR, table *Table, bsontable,isSite bool,codeSite string) {
 	lenval := len([]rune(td.Val)) //经过处理的td内容长度
 	//if lentxt > 9 {
 	//td.KV = GetKVAll(txt, "")
@@ -308,7 +308,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable,isSite bool) {
 	//经过处理的td内容长度大于50,划块,分包
 	if lenval > 50 { //看是否划块
 		//u.Debug(txt)
-		ub, _ = DivideBlock("", td.Text, 2, table.TableResult.RuleBlock,isSite) //对td的原始值
+		ub, _ = DivideBlock("", td.Text, 2, table.TableResult.RuleBlock,isSite,codeSite) //对td的原始值
 		//看是否划块
 		if len(ub) > 0 {
 			for _, bl := range ub {
@@ -341,9 +341,9 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable,isSite bool) {
 		}
 		if isFindPkg {
 			if len(ub) > 0 {
-				blockPackage = FindPackageFromBlocks(&ub,isSite) //从块里面找分包
+				blockPackage = FindPackageFromBlocks(&ub,isSite,codeSite) //从块里面找分包
 			} else {
-				blockPackage = FindPackageFromText("", td.Val,isSite) //从正文里面找分包
+				blockPackage = FindPackageFromText("", td.Val,isSite,codeSite) //从正文里面找分包
 			}
 		}
 		if len(blockPackage) > 0 {
@@ -384,7 +384,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable,isSite bool) {
 			td.SortKV.AddKey(strings.TrimSpace(td.Text[:tagindex]), strings.TrimSpace(td.Text[tagindex:])) //存放kv值
 			td.BH = true
 		}
-		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 3,isSite) //td冒号kv
+		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 3,isSite,codeSite) //td冒号kv
 		for k, v := range resm {
 			if k != "" && v != "" {
 				td.SortKV.AddKey(k, v) //存放kv值
@@ -445,7 +445,7 @@ func (td *TD) tdIsHb(tr *TR, table *Table, bsontable,isSite bool) {
 		if len(td.TR.TDs) > 0 {
 			kvTitle = td.TR.TDs[len(td.TR.TDs)-1].Val
 		}
-		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 2,isSite) //获取冒号kv入口
+		_, resm := colonkvEntity.entrance(td.Val, kvTitle, nil, 2,isSite,codeSite) //获取冒号kv入口
 		for k, v := range resm {
 			td.SortKV.AddKey(k, v)
 		}

+ 8 - 8
src/jy/pretreated/winnerorder.go

@@ -51,7 +51,7 @@ var (
  *text文本,flag非否精确查找
  *from 来源
  */
-func (wo *WinnerOrderEntity) Find(text string, flag bool, from int,isSite bool) []map[string]interface{} {
+func (wo *WinnerOrderEntity) Find(text string, flag bool, from int,isSite bool,codeSite string) []map[string]interface{} {
 	text = winnerReg5.ReplaceAllString(text, "\n$3:$1\n")
 	/*
 		"_id" : ObjectId("5c2c6f60a5cb26b9b7b62cd8")
@@ -70,13 +70,13 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int,isSite bool)
 	if len(blocks) == 0 {
 		blocks = append(blocks, text)
 	}
-	winners := wo.findByReg(text, blocks, winnerReg1, from,isSite)
+	winners := wo.findByReg(text, blocks, winnerReg1, from,isSite,codeSite)
 	if len(winners) == 0 {
-		winners = wo.findByReg(text, blocks, winnerReg2, from,isSite)
+		winners = wo.findByReg(text, blocks, winnerReg2, from,isSite,codeSite)
 	}
 	if len(winners) == 0 {
 		if flag {
-			winners = wo.findByReg(text, blocks, winnerReg3, from,isSite)
+			winners = wo.findByReg(text, blocks, winnerReg3, from,isSite,codeSite)
 		} else {
 			indexs_4 := winnerReg4.Split(text, -1)
 			if len(indexs_4) > 1 {
@@ -87,7 +87,7 @@ func (wo *WinnerOrderEntity) Find(text string, flag bool, from int,isSite bool)
 					}
 					for _, v_3 := range indexs_3 {
 						if strings.Count(v_4[:v_3[1]], "\n") <= 3 {
-							winners = wo.findByReg(text, blocks, winnerReg3, from,isSite)
+							winners = wo.findByReg(text, blocks, winnerReg3, from,isSite,codeSite)
 							break
 						}
 					}
@@ -206,7 +206,7 @@ func (wo *WinnerOrderEntity) getText(text string, blocks []string, reg_2 *regexp
 }
 
 //抽取对应的排序结果
-func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *regexp.Regexp, from int,isSite bool) []map[string]interface{} {
+func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *regexp.Regexp, from int,isSite bool,codeSite string) []map[string]interface{} {
 	text := wo.getText(content, blocks, reg_2, from)
 	winners := []map[string]interface{}{}
 	if len(text) < 1 {
@@ -215,7 +215,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 	for i, v := range text {
 		object := map[string]interface{}{}
 		count := 0
-		kvs := colonkvEntity.getColonSpaceKV(v,isSite)
+		kvs := colonkvEntity.getColonSpaceKV(v,isSite,codeSite)
 		for _, kv := range kvs {
 			k, v := kv.Key, kv.Value
 			if regDivision.MatchString(v) {
@@ -242,7 +242,7 @@ func (wo *WinnerOrderEntity) findByReg(content string, blocks []string, reg_2 *r
 				if offerReg.MatchString(k) {
 					findOfferFlag = true
 				} else {
-					kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"},isSite)
+					kvTags := GetKvTags([]*util.Kv{&util.Kv{Key: k, Value: v}}, "", []string{"中标金额"},isSite,codeSite)
 					if len(kvTags["中标金额"]) > 0 {
 						findOfferFlag = true
 					}

+ 20 - 14
src/jy/util/tagmatch.go

@@ -127,12 +127,12 @@ func LoadTagDb(respath string) {
 }
 
 //取得匹配
-func GetTags(src string,isSite bool) Tags {
-	return GetAppointTags(src, nil,isSite)
+func GetTags(src string,isSite bool,codeSite string) Tags {
+	return GetAppointTags(src, nil,isSite,codeSite)
 }
 
 //根据指定的标签库取得匹配
-func GetAppointTags(src string, array []string,isSite bool) Tags {
+func GetAppointTags(src string, array []string,isSite bool,codeSite string) Tags {
 	src = TrimLRAll(src, "")
 	ret := make(Tags, 0)
 	m := map[string]bool{}
@@ -151,18 +151,24 @@ func GetAppointTags(src string, array []string,isSite bool) Tags {
 	//	}
 	//}
 	//lock.Unlock()
-	if isSite{
-		SiteTagdbTable.Range(func(key, value interface{}) bool {
-			if len(m) > 0 && !m[fmt.Sprint(key)] {
-				return true
-			}
-			if v,ok := value.(*TagFile);ok {
-				if ok, tag := v.Match(src); ok {
-					ret = append(ret, &Tag{src, v.Name, tag.Weight, tag.TagReg, false})
+	if isSite {
+		value, ok := SiteTagdbTable.Load(codeSite)
+		if !ok{
+			return ret
+		}
+		if vvv,ok2 := value.(sync.Map);ok2{
+			vvv.Range(func(key, value interface{}) bool {
+				if len(m) > 0 && !m[fmt.Sprint(key)] {
+					return true
 				}
-			}
-			return true
-		})
+				if v,ok := value.(*TagFile);ok {
+					if ok, tag := v.Match(src); ok {
+						ret = append(ret, &Tag{src, v.Name, tag.Weight, tag.TagReg, false})
+					}
+				}
+				return true
+			})
+		}
 	}else {
 		TagdbTable.Range(func(key, value interface{}) bool {
 			if len(m) > 0 && !m[fmt.Sprint(key)] {