fengweiqiang %!s(int64=5) %!d(string=hai) anos
pai
achega
36eb42c24c

+ 5 - 5
src/jy/admin/rulecheck.go

@@ -307,7 +307,7 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 //lua脚本前置过滤验证
 func checkPreScript(code, name, infoid, script string) map[string]interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _,_,_ := extract.PreInfo(*doc)
+	j, _,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block
@@ -331,7 +331,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 	e.InitTag(false)
 	e.InitTag(true)
 	tmp, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _,_,_ := extract.PreInfo(*tmp)
+	j, _,_ := extract.PreInfo(*tmp)
 	doc := *j.Data
 	//全局前置规则,结果覆盖doc属性
 	for _, v := range e.RulePres {
@@ -351,7 +351,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 					tmp = extract.ExtRegPre(tmp, j, v, e.TaskInfo)
 				}
 				//抽取-规则
-				extract.ExtRuleCore(tmp, e, vc, j)
+				extract.ExtRuleCore(tmp, e, vc, j,false)
 			}
 		}
 	} else {
@@ -367,7 +367,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 					tmp = extract.ExtRegPre(tmp, j, v, e.TaskInfo)
 				}
 				//抽取-规则
-				extract.ExtRuleCore(tmp, e, vc, j)
+				extract.ExtRuleCore(tmp, e, vc, j,false)
 			}
 		}
 	}
@@ -394,7 +394,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 //lua脚本抽取验证
 func checkCoreScript(code, name, infoid, script string) interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j, _,_,_ := extract.PreInfo(*doc)
+	j, _,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block

+ 7 - 0
src/jy/admin/site_management.go

@@ -108,6 +108,13 @@ func init() {
 		b := Mgo.UpdateById("site_management", _id, `{"$set":{"isuse":`+isuse+`}}`)
 		c.JSON(200, gin.H{"rep": b})
 	})
+	//抽取合并开关按钮
+	Admin.POST("/site_management/usemerge", func(c *gin.Context) {
+		_id, _ := c.GetPostForm("_id")
+		ismerge, _ := c.GetPostForm("ismerge")
+		b := Mgo.UpdateById("site_management", _id, `{"$set":{"ismerge":`+ismerge+`}}`)
+		c.JSON(200, gin.H{"rep": b})
+	})
 	//属性
 	Admin.GET("/site_management/info", func(c *gin.Context) {
 		vid := c.Query("vid")

+ 4 - 5
src/jy/extract/exportask.go

@@ -51,7 +51,7 @@ func extractAndExport(v string, t map[string]interface{}) {
 		ProcessPool: make(chan bool, 5),
 	}
 	e.TaskInfo.FDB = db.MgoFactory(1, 3, 120, fmt.Sprint(t["dbaddr"]), fmt.Sprint(t["dbname"]))
-	ext.InitSite()
+	e.InitSite()
 	e.InitRulePres()
 	e.InitRuleBacks(false)
 	e.InitRuleBacks(true)
@@ -78,15 +78,14 @@ func extractAndExport(v string, t map[string]interface{}) {
 		}
 		var j, jf *ju.Job
 		var isSite bool
-		var codeSite string
 		if e.IsFileField && v["projectinfo"] != nil {
 			v["isextFile"] = true
-			j, jf,isSite,codeSite = e.PreInfo(v)
+			j, jf,isSite= e.PreInfo(v)
 		} else {
-			j, _,isSite,codeSite = e.PreInfo(v)
+			j, _,isSite = e.PreInfo(v)
 		}
+		go e.ExtractProcess(j, jf,isSite)
 		e.TaskInfo.ProcessPool <- true
-		go e.ExtractProcess(j, jf,isSite,codeSite)
 	}
 }
 

+ 197 - 125
src/jy/extract/extract.go

@@ -35,8 +35,6 @@ var (
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
-var Luacodes = sync.Map{}
-var SiteManages = sync.Map{}
 
 //启动测试抽取
 func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bool {
@@ -97,15 +95,14 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 			}
 			var j, jf *ju.Job
 			var isSite bool
-			var codeSite string
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf, isSite,codeSite = ext.PreInfo(v)
+				j, jf, isSite = ext.PreInfo(v)
 			} else {
-				j, _, isSite,codeSite = ext.PreInfo(v)
+				j, _, isSite = ext.PreInfo(v)
 			}
+			go ext.ExtractProcess(j, jf, isSite)
 			ext.TaskInfo.ProcessPool <- true
-			go ext.ExtractProcess(j, jf, isSite,codeSite)
 		}
 		return true
 	} else {
@@ -197,8 +194,8 @@ func RunExtractTask(taskId string) {
 	fmt.Printf("count=%d,pageNum=%d,query=%v", count, pageNum, query)
 	for i := 0; i < pageNum; i++ {
 		query = bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}}
-		fmt.Printf("page=%d,query=%v", i+1, query)
 		list, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)
+		fmt.Printf("page=%d,query=%v", i+1, query, len(*list))
 		for _, v := range *list {
 			if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
 				continue
@@ -215,16 +212,15 @@ func RunExtractTask(taskId string) {
 			}
 			var j, jf *ju.Job
 			var isSite bool
-			var codeSite string
 			if ext.IsFileField && v["projectinfo"] != nil {
 				v["isextFile"] = true
-				j, jf, isSite,codeSite = ext.PreInfo(v)
+				j, jf, isSite = ext.PreInfo(v)
 			} else {
-				j, _, isSite,codeSite = ext.PreInfo(v)
+				j, _, isSite = ext.PreInfo(v)
 			}
-			ext.TaskInfo.ProcessPool <- true
-			go ext.ExtractProcess(j, jf, isSite,codeSite)
+			go ext.ExtractProcess(j, jf, isSite)
 			ext.TaskInfo.LastExtId = _id
+			ext.TaskInfo.ProcessPool <- true
 		}
 		db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
 		if !ext.IsRun {
@@ -236,12 +232,12 @@ func RunExtractTask(taskId string) {
 }
 
 //信息预处理-不和版本关联,取最新版本的配置项
-func PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool,codeSite string) {
+func PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
 	return (&ExtractTask{}).PreInfo(doc)
 }
 
 //信息预处理-和版本关联
-func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool,codeSite string) {
+func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite bool) {
 	defer qu.Catch()
 	//判断是否有附件这个字段
 	var isextFile bool
@@ -258,6 +254,8 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	}
 	detail = regexp.MustCompile(`<!--[\w\W]*?-->`).ReplaceAllString(detail, "")
 	d3, _ := doc["summary"].(string)
+	//全文的需要修复表格
+	detail = pretreated.RepairCon(detail)
 	detail = ju.CutLableStr(d3 + "\n" + detail)
 	detail = cut.ClearHtml(d3 + "\n" + detail)
 	doc["detail"] = detail
@@ -326,31 +324,31 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		}
 	}
 	//是否配置站点
-	codeSite = qu.ObjToString(doc["spidercode"])
-	exp, isSite := Luacodes.Load(codeSite)
-	if isSite{
-		if exp.( map[string]interface{})["e.SiteClearFn"]!= nil{
-			e.SiteClearFn = exp.( map[string]interface{})["e.SiteClearFn"].( map[string][]string)
+	codeSite := j.SpiderCode
+	exp, isSite := e.Luacodes.Load(codeSite)
+	if isSite {
+		if exp.(map[string]interface{})["e.SiteClearFn"] != nil {
+			e.SiteClearFn = exp.(map[string]interface{})["e.SiteClearFn"].(map[string][]string)
 		}
-		if exp.( map[string]interface{})["e.SiteTag"]!= nil{
-			e.SiteTag = exp.( map[string]interface{})["e.SiteTag"].( map[string][]*Tag)
+		if exp.(map[string]interface{})["e.SiteTag"] != nil {
+			e.SiteTag = exp.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)
 		}
-		if  exp.( map[string]interface{})["e.SiteRuleCores"] != nil{
-			e.SiteRuleCores = exp.( map[string]interface{})["e.SiteRuleCores"].(  map[string]map[string][]*RuleCore)
+		if exp.(map[string]interface{})["e.SiteRuleCores"] != nil {
+			e.SiteRuleCores = exp.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore)
 		}
-		if  exp.( map[string]interface{})["e.SiteRuleBacks"]!= nil{
-			e.SiteRuleBacks = exp.( map[string]interface{})["e.SiteRuleBacks"].(  []*RegLuaInfo)
+		if exp.(map[string]interface{})["e.SiteRuleBacks"] != nil {
+			e.SiteRuleBacks = exp.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo)
 		}
 	}
 	qu.Try(func() {
-		pretreated.AnalyStart(j, isSite,codeSite) //job.Block分块
+		pretreated.AnalyStart(j, isSite, codeSite) //job.Block分块
 		if isextFile {
-			pretreated.AnalyStart(jf, isSite,codeSite)
+			pretreated.AnalyStart(jf, isSite, codeSite)
 		}
 	}, func(err interface{}) {
 		log.Debug("pretreated.AnalyStart", err, j.SourceMid)
 	})
-	return j, jf, isSite,codeSite
+	return j, jf, isSite
 }
 
 //遍历附件字段内容,拼接在一起;附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
@@ -386,17 +384,62 @@ func file2text(doc *map[string]interface{}) {
 }
 
 //抽取
-func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool,codeSite string) {
-	e.ExtractDetail(j, isSite,codeSite)
+func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
+	e.ExtractDetail(j, isSite, j.SpiderCode)
 	if jf != nil && jf.IsFile {
-		e.ExtractFile(jf, isSite,codeSite)
+		e.ExtractFile(jf, isSite, j.SpiderCode)
+	}
+	if isSite {
+		ismerge, ok := e.SiteMerge.Load(j.SpiderCode)
+		if ok && ismerge.(bool) {
+			tmpj := &ju.Job{
+				SourceMid:      j.SourceMid,
+				Category:       j.Category,
+				CategorySecond: j.CategorySecond,
+				Content:        j.Content,
+				SpiderCode:     j.SpiderCode,
+				//Domain:     qu.ObjToString(doc["domain"]),
+				//Href:       qu.ObjToString(doc["href"]),
+				Title:     j.Title,
+				Data:      j.Data,
+				City:      j.City,
+				Province:  j.Province,
+				Jsondata:  j.Jsondata,
+				Result:    map[string][]*ju.ExtField{},
+				BuyerAddr: j.BuyerAddr,
+				RuleBlock: e.RuleBlock,
+			}
+			qu.Try(func() {
+				pretreated.AnalyStart(tmpj, false, "") //job.Block分块
+			}, func(err interface{}) {
+				log.Debug("pretreated.AnalyStart.ExtractProcess", err, j.SourceMid)
+			})
+			e.ExtractDetail(tmpj, false, "")
+			//if jf != nil && jf.IsFile {
+			//	e.ExtractFile(jf, false, "")
+			//}
+			//合并数据
+			j.Block = append(j.Block, tmpj.Block...)
+			j.Winnerorder = append(j.Winnerorder, tmpj.Winnerorder...)
+			for tmpk,_:= range j.Result{
+				if len(tmpj.Result[tmpk]) >0 {
+					j.Result[tmpk] = append(j.Result[tmpk], tmpj.Result[tmpk]...)
+				}
+			}
+			for tmpk ,_ :=range tmpj.Result{
+				if len(j.Result[tmpk]) == 0{
+					j.Result[tmpk] = append(j.Result[tmpk], tmpj.Result[tmpk]...)
+				}
+			}
+		}
 	}
+
 	//分析抽取结果并保存 todo
 	AnalysisSaveResult(j, jf, e)
 	<-e.TaskInfo.ProcessPool
 }
 
-func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool,codeSite string) {
+func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 	qu.Try(func() {
 		doc := *j.Data
 		//全局前置规则,结果覆盖doc属性
@@ -453,7 +496,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool,codeSite string) {
 				// log.Debug("抽取-前置规则", tmp)
 
 				//抽取-规则
-				ExtRuleCore(tmp, e, vc, j)
+				ExtRuleCore(tmp, e, vc, j, isSite)
 				// log.Debug("抽取-规则", tmp)
 
 				//抽取-后置规则
@@ -474,6 +517,9 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool,codeSite string) {
 						}
 						if isextitle { //标题加入选举
 							field := &ju.ExtField{Field: vc.Field, Code: vc.Id + "_title", RuleText: "title", Type: "title", MatchType: "title", ExtFrom: vc.ExtFrom, SourceValue: j.Title, Value: j.Title}
+							if isSite{
+								field.Score = 1
+							}
 							j.Result[vc.Field] = append(j.Result[vc.Field], field)
 						}
 					}
@@ -502,11 +548,13 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool,codeSite string) {
 				var cfn = []string{}
 				if isSite {
 					cfn = e.SiteClearFn[key]
-
 				} else {
 					cfn = e.ClearFn[key]
 				}
 				lockclear.Unlock()
+				if len(cfn) == 0 {
+					continue
+				}
 				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
 				before, _ := v.Value.(string)
 				v.Value = data[0]
@@ -524,14 +572,14 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool,codeSite string) {
 				lockclear.Unlock()
 			}
 		}
-		PackageDetail(j, e, isSite,codeSite) //处理分包信息
+		PackageDetail(j, e, isSite, codeSite) //处理分包信息
 		//		bs, _ := json.Marshal(j.Result)
 		//		 log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
 	}, func(err interface{}) {
 		log.Debug("ExtractProcess err", err)
 	})
 }
-func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool,codeSite string) {
+func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
 	qu.Try(func() {
 		doc := *j.Data
 		//全局前置规则,结果覆盖doc属性
@@ -570,7 +618,7 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool,codeSite string) {
 
 				//抽取-规则
 				if value, ok := e.FileFields.Load(vc.Field); ok && qu.IntAllDef(value, 1) > 0 {
-					ExtRuleCore(tmp, e, vc, j)
+					ExtRuleCore(tmp, e, vc, j, isSite)
 				}
 				// log.Debug("抽取-规则", tmp)
 
@@ -610,7 +658,7 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool,codeSite string) {
 			}
 		}
 
-		PackageDetail(j, e, isSite,codeSite) //处理分包信息
+		PackageDetail(j, e, isSite, codeSite) //处理分包信息
 		//		bs, _ := json.Marshal(j.Result)
 		//		 log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
 	}, func(err interface{}) {
@@ -649,7 +697,7 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 }
 
 //抽取-规则
-func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju.Job) {
+func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju.Job, isSite bool) {
 	//候选人加入
 	var kvMap map[string][]map[string]interface{}
 	extByReg := true
@@ -658,9 +706,9 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
 	}
 	for _, v := range vc.RuleCores {
 		if v.IsLua {
-			ExtRuleCoreByKv(vc.ExtFrom, doc, j, v, kvMap,e)
+			ExtRuleCoreByKv(vc.ExtFrom, doc, j, v, &kvMap, e)
 		} else if extByReg {
-			ExtRuleCoreByReg(vc.ExtFrom, doc, j, v, e)
+			ExtRuleCoreByReg(vc.ExtFrom, doc, j, v, e, isSite)
 		}
 	}
 	//如果只有一个分包,预算没有抽取到,把分包中的预算保存到外面
@@ -686,7 +734,7 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
 						for _, vv := range v.KvTags[fieldname] {
 							text := ju.TrimLRSpace(vv.Value, "")
 							if text != "" {
-								j.Result[vc.Field] = append(j.Result[vc.Field], &ju.ExtField{
+								tmp := &ju.ExtField{
 									Field:       vc.Field,
 									Code:        "CL_分包",
 									Type:        tp,
@@ -694,7 +742,11 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
 									RuleText:    bp.Text,
 									SourceValue: vv.Key,
 									Value:       text,
-								})
+								}
+								if isSite{
+									tmp.Score = 1
+								}
+								j.Result[vc.Field] = append(j.Result[vc.Field],tmp)
 							}
 						}
 					}
@@ -709,6 +761,9 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
 			}
 			for _, tmp := range v {
 				field := &ju.ExtField{Weight: qu.IntAll(tmp["weight"]), Field: k, Code: qu.ObjToString(tmp["code"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]), RuleText: qu.ObjToString(tmp["ruletext"]), SourceValue: tmp["sourcevalue"], Value: tmp["value"]}
+				if isSite {
+					field.Score = 1
+				}
 				if tmp["blocktag"] != nil {
 					btag := make(map[string]string)
 					for k := range tmp["blocktag"].(map[string]bool) {
@@ -728,20 +783,20 @@ func ExtRuleCore(doc map[string]interface{}, e *ExtractTask, vc *RuleCore, j *ju
 }
 
 //抽取-规则-kv
-func ExtRuleCoreByKv(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, kvMap map[string][]map[string]interface{}, et *ExtractTask) {
+func ExtRuleCoreByKv(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, kvMap *map[string][]map[string]interface{}, et *ExtractTask) {
 	defer qu.Catch()
 	if extfrom == "title" || !in.IsLua {
 		return
 	}
 	lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
-	lua.KvMap = kvMap
+	lua.KvMap = *kvMap
 	lua.Block = j.Block
 	extinfo := lua.RunScript("core")
 	if tmps, ok := extinfo[in.Field].([]map[string]interface{}); ok {
 		for _, v := range tmps {
 			v["core"] = in.Code
 		}
-		kvMap[in.Field] = tmps
+		(*kvMap)[in.Field] = append((*kvMap)[in.Field], tmps...)
 	}
 	if len(extinfo) > 0 {
 		AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
@@ -749,7 +804,7 @@ func ExtRuleCoreByKv(extfrom string, doc map[string]interface{}, j *ju.Job, in *
 }
 
 //抽取-规则-正则
-func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
+func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask, isSite bool) {
 	defer qu.Catch()
 	//根据field配置项目,是否抽取。例如:废标、流标等跳过,
 	b := IsExtract(in.Field, j.Title, j.Content)
@@ -767,7 +822,7 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
 	//块抽取
 	if in.Field != "" {
 		if extfrom == "title" {
-			extinfo := extRegCoreToResult(extfrom, qu.ObjToString(doc[extfrom]), &map[string]string{}, j, in)
+			extinfo := extRegCoreToResult(extfrom, qu.ObjToString(doc[extfrom]), &map[string]string{}, j, in, isSite)
 			if len(extinfo) > 0 {
 				AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 			}
@@ -779,7 +834,7 @@ func ExtRuleCoreByReg(extfrom string, doc map[string]interface{}, j *ju.Job, in
 					btag[k] = TagConfigDesc[k]
 					blocktag.Unlock()
 				}
-				extinfo := extRegCoreToResult(extfrom, v.Text, &btag, j, in)
+				extinfo := extRegCoreToResult(extfrom, v.Text, &btag, j, in, isSite)
 				if len(extinfo) > 0 {
 					AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 				}
@@ -897,89 +952,98 @@ func extractFromKv(field, fieldname string, blocks []*ju.Block, vc *RuleCore, kv
 }
 
 //正则提取结果
-func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job, v *RegLuaInfo) map[string][]map[string]interface{} {
+func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job, vre *RegLuaInfo,isSite bool) map[string][]map[string]interface{} {
 	defer qu.Catch()
+	var score int
+	if isSite{
+		score = 1
+	}
 	extinfo := map[string][]map[string]interface{}{}
-	if v.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
-		apos := v.RegCore.Reg.FindAllStringSubmatchIndex(text, -1)
-		if len(apos) > 0 {
-			pos := apos[0]
-			for k, p := range v.RegCore.ExtractPos {
-				if len(pos) > p {
-					if pos[p] == -1 || pos[p+1] == -1 {
-						continue
-					}
-					val := text[pos[p]:pos[p+1]]
-					sourcevalue := val
-					if val == "招标公告" {
-						return extinfo
-					}
-					if utf8.RuneCountInString(val) < 2 && extfrom == "title" {
-						val = text
+	rep := map[string]string{}
+	if vre.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
+		//处理正负数修正
+		ptmp := strings.Split(vre.RuleText, "#")
+		sign := 0
+		if len(ptmp) == 2 {
+			if ptmp[1] == "正" {
+				sign = 1
+			} else if ptmp[1] == "负" {
+				sign = -1
+			}
+		}
+		tmp := strings.Split(ptmp[0], "__")
+		if len(tmp) == 2 {
+			epos := strings.Split(tmp[1], ",")
+			posm := map[string]int{}
+			for _, v := range epos {
+				ks := strings.Split(v, ":")
+				if len(ks) == 2 { //(.*)招标公告(.*)__2:projectname,4:area
+					posm[ks[1]] = qu.IntAll(ks[0])
+				} else {
+					posm[vre.Field] = qu.IntAll(ks[0])
+				}
+			}
+			var pattern string
+			if strings.Contains(tmp[0], "\\u") {
+				tmp[0] = strings.Replace(tmp[0], "\\", "\\\\", -1)
+				tmp[0] = strings.Replace(tmp[0], "\\\\u", "\\u", -1)
+				pattern, _ = strconv.Unquote(`"` + tmp[0] + `"`)
+			} else {
+				pattern = tmp[0]
+			}
+			//log.Debug("pattern", pattern)
+			//fmt.Println(text)
+			reg := regexp.MustCompile(pattern)
+			apos := reg.FindAllStringSubmatchIndex(text, -1)
+			for i, _ := range apos {
+				pos := apos[i]
+				for k, p := range posm {
+					if len(pos) > p {
+						if pos[p] == -1 || pos[p+1] == -1 {
+							continue
+						}
+						val := text[pos[p]:pos[p+1]]
+						if string(val) == "" {
+							continue
+						}
+						if sign == -1 {
+							rep[k+"_"+fmt.Sprint(i)] = "-" + val
+						} else {
+							rep[k+"_"+fmt.Sprint(i)] = val
+						}
 					}
-					tmps := []map[string]interface{}{}
+				}
+			}
+			//fmt.Println(text)
+			tmps := []map[string]interface{}{}
+			for i := 0; i < len(apos); i++ {
+				if strings.TrimSpace(rep[vre.Field+"_"+fmt.Sprint(i)]) != "" {
 					tmp := map[string]interface{}{
-						"field":     v.Field,
-						"code":      v.Code,
-						"ruletext":  v.RuleText,
+						"field":     vre.Field,
+						"code":      vre.Code,
+						"ruletext":  vre.RuleText,
 						"extfrom":   text,
-						"value":     val,
+						"value":     rep[vre.Field+"_"+fmt.Sprint(i)],
 						"type":      "regexp",
 						"matchtype": "regcontent",
 						"blocktag":  *tag,
+						"score" :score,
 					}
 					tmps = append(tmps, tmp)
-					extinfo[k] = tmps
-					if strings.TrimSpace(val) != "" {
-						if v.RegCore.NumSign == -1 { //正负值修正
-							val = "-" + val
-						}
-						exfield := ju.ExtField{BlockTag: *tag, Field: k, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: sourcevalue, Value: val}
-						if tmp["blocktag"] != nil {
-							exfield.BlockTag = tmp["blocktag"].(map[string]string)
-						}
-						j.Result[k] = append(j.Result[k], &exfield)
-						//j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
+
+					exfield := ju.ExtField{BlockTag: *tag, Field: vre.Field, Code: vre.Code, RuleText: vre.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: rep[vre.Field+"_"+fmt.Sprint(i)], Value: rep[vre.Field+"_"+fmt.Sprint(i)]}
+					if tmp["blocktag"] != nil {
+						exfield.BlockTag = tmp["blocktag"].(map[string]string)
 					}
+					j.Result[vre.Field] = append(j.Result[vre.Field], &exfield)
 				}
 			}
-			if len(extinfo) == 0 {
-				regArr := strings.Split(v.RuleText, "__")
-				//fmt.Println(regArr[0])
-				if len(regArr) > 0 {
-					reg, err := regexp.Compile(regArr[0])
-					if err == nil {
-						datavals := reg.FindStringSubmatch(text)
-						tmps := []map[string]interface{}{}
-						for _, value := range datavals {
-							if value == "" {
-								continue
-							}
-							tmp := map[string]interface{}{
-								"field":     v.Field,
-								"code":      v.Code,
-								"ruletext":  regArr[0],
-								"extfrom":   text,
-								"value":     value,
-								"type":      "regexp",
-								"matchtype": "regcontent",
-								"blocktag":  *tag,
-							}
-							tmps = append(tmps, tmp)
-							extinfo[v.Field] = tmps
-							exfield := ju.ExtField{BlockTag: *tag, Field: v.Field, Code: v.Code + "去除__*后", RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: text, Value: value}
-							if tmp["blocktag"] != nil {
-								exfield.BlockTag = tmp["blocktag"].(map[string]string)
-							}
-							j.Result[v.Field] = append(j.Result[v.Field], &exfield)
-							//j.Result[k] = append(j.Result[k], &ju.ExtField{tmp["blocktag"].(map[string]bool), k, v.Code, v.RuleText, "regexp", "regcontent", extfrom, val, 0})
-						}
-					}
-				}
+			if len(tmps) > 0 {
+				extinfo[vre.Field] = tmps
 			}
 		}
 	} else {
-		pos := v.RegCore.Reg.FindStringIndex(text)
+		pos := vre.RegCore.Reg.FindStringIndex(text)
 		val := ""
 		if len(pos) == 2 {
 			text = text[pos[1]:]
@@ -992,25 +1056,26 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]string, j *ju.Job,
 		if val != "" {
 			tmps := []map[string]interface{}{}
 			tmp := map[string]interface{}{
-				"field":     v.Field,
-				"code":      v.Code,
-				"ruletext":  v.RuleText,
+				"field":     vre.Field,
+				"code":      vre.Code,
+				"ruletext":  vre.RuleText,
 				"extfrom":   text,
 				"value":     val,
 				"type":      "regexp",
 				"matchtype": "regcontent",
 				"blocktag":  *tag,
+				"score" :score,
 			}
 			tmps = append(tmps, tmp)
-			extinfo[v.Field] = tmps
-			if j.Result[v.Field] == nil {
-				j.Result[v.Field] = [](*ju.ExtField){}
+			extinfo[vre.Field] = tmps
+			if j.Result[vre.Field] == nil {
+				j.Result[vre.Field] = [](*ju.ExtField){}
 			}
-			field := &ju.ExtField{BlockTag: *tag, Field: v.Field, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: text, Value: val}
+			field := &ju.ExtField{BlockTag: *tag, Field: vre.Field, Code: vre.Code, RuleText: vre.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, SourceValue: text, Value: val}
 			if tmp["blocktag"] != nil {
 				field.BlockTag = tmp["blocktag"].(map[string]string)
 			}
-			j.Result[v.Field] = append(j.Result[v.Field], field)
+			j.Result[vre.Field] = append(j.Result[vre.Field], field)
 		}
 	}
 	return extinfo
@@ -1030,7 +1095,7 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 			if tmps, ok := v.([]map[string]interface{}); ok {
 				j.Result[k] = [](*ju.ExtField){}
 				for _, tmp := range tmps {
-					field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]), ExtFrom: qu.ObjToString(tmp["extfrom"]), Value: tmp["value"], Score: 0}
+					field := &ju.ExtField{Field: k, Code: qu.ObjToString(tmp["code"]), RuleText: qu.ObjToString(tmp["ruletext"]), Type: qu.ObjToString(tmp["type"]), MatchType: qu.ObjToString(tmp["matchtype"]), ExtFrom: qu.ObjToString(tmp["extfrom"]), Value: tmp["value"]}
 					if tmp["blocktag"] != nil {
 						field.BlockTag = tmp["blocktag"].(map[string]string)
 					}
@@ -1661,11 +1726,15 @@ func resetWinnerorder(j *ju.Job) {
 	if len(j.Winnerorder) == 0 {
 		return
 	}
+	maxlen := len(j.Winnerorder)-1
 	//中标单位
 	i := 0
 	winners := []*ju.ExtField{}
 	for _, v := range j.Result["winner"] {
 		if v.Code == "winnerorder" {
+			if maxlen < i {
+				continue
+			}
 			j.Winnerorder[i]["entname"] = v.Value
 			i++
 		} else {
@@ -1678,6 +1747,9 @@ func resetWinnerorder(j *ju.Job) {
 	bidamounts := []*ju.ExtField{}
 	for _, v := range j.Result["bidamount"] {
 		if v.Code == "winnerorder" {
+			if maxlen < i {
+				continue
+			}
 			j.Winnerorder[i]["price"] = v.Value
 			i++
 		} else {

+ 51 - 46
src/jy/extract/extractInit.go

@@ -132,6 +132,8 @@ type ExtractTask struct {
 	Trie_Sims           []*ju.Trie     //所有简称
 	Seg_PCD             *gse.Segmenter //分词
 	Seg_SV              *gse.Segmenter //分词
+	Luacodes            *sync.Map      //站点规则
+	SiteMerge           *sync.Map      //抽取合并
 }
 
 type SiteCity struct {
@@ -237,15 +239,18 @@ func (e *ExtractTask) InitTaskInfo() {
 	}
 }
 func (e *ExtractTask) InitSite() {
-	Luacodes = sync.Map{}
-	sites, _ := db.Mgo.Find("site_management", bson.M{"version": e.TaskInfo.Version}, nil, bson.M{"site_script": 1}, false, -1, -1)
+	e.Luacodes = &sync.Map{}
+	e.SiteMerge = &sync.Map{}
+	sites, _ := db.Mgo.Find("site_management", bson.M{"version": e.TaskInfo.Version}, nil, bson.M{"site_script": 1, "ismerge": 1}, false, -1, -1)
 	for _, v := range *sites {
 		if vv, ok := v["site_script"].([]interface{}); ok {
-			for _, vv := range vv {
-				Luacodes.Store(vv, map[string]interface{}{})
+			for _, vvv := range vv {
+				e.Luacodes.Store(vvv, map[string]interface{}{})
+				e.SiteMerge.Store(vvv,v["ismerge"].(bool))
 			}
 		} else if vv, ok := v["site_script"].(interface{}); ok {
-			Luacodes.Store(vv, map[string]interface{}{})
+			e.Luacodes.Store(vv, map[string]interface{}{})
+			e.SiteMerge.Store(vv,v["ismerge"].(bool))
 		}
 	}
 }
@@ -344,22 +349,22 @@ func (e *ExtractTask) InitRuleBacks(isSite bool) {
 		}
 		if isSite {
 			sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(v["pid"]), bson.M{"site_script": 1})
-			if (*sm) == nil || len(*sm) <= 0{
+			if (*sm) == nil || len(*sm) <= 0 {
 				eSiteRuleBacks = []*RegLuaInfo{}
 				continue
 			}
 			for _, v2 := range (*sm)["site_script"].([]interface{}) {
-				if mdpvalue, ok := Luacodes.Load(v2); ok {
-					if mdpvalue.(map[string]interface{})["e.SiteRuleBacks"] == nil{
+				if mdpvalue, ok := e.Luacodes.Load(v2); ok {
+					if mdpvalue.(map[string]interface{})["e.SiteRuleBacks"] == nil {
 						mdpvalue.(map[string]interface{})["e.SiteRuleBacks"] = eSiteRuleBacks
-					}else {
-						if tmplist,ok3 :=mdpvalue.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo) ;ok3{
+					} else {
+						if tmplist, ok3 := mdpvalue.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo); ok3 {
 							tmplist = append(tmplist, eSiteRuleBacks...)
 							mdpvalue.(map[string]interface{})["e.SiteRuleBacks"] = tmplist
 						}
 						//mdpvalue.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo) = append(mdpvalue.(map[string]interface{})["e.SiteRuleBacks"].([]*RegLuaInfo), eSiteRuleBacks...)
 					}
-					Luacodes.Store(v2, mdpvalue)
+					e.Luacodes.Store(v2, mdpvalue)
 				}
 			}
 			eSiteRuleBacks = []*RegLuaInfo{}
@@ -556,7 +561,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 			if v["subclass"] == nil {
 				eSiteRuleCores[topclass] = make(map[string][]*RuleCore)
 				for attr, _ := range v["fields"].(map[string]interface{}) {
-					if fieldrules[attr]!=nil{
+					if fieldrules[attr] != nil {
 						eSiteRuleCores[topclass][attr] = fieldrules[attr]
 					}
 				}
@@ -564,7 +569,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 				for ca, fs := range v["subclass"].(map[string]interface{}) {
 					eSiteRuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
 					for field, _ := range fs.(map[string]interface{}) {
-						if fieldrules[field] !=nil{
+						if fieldrules[field] != nil {
 							eSiteRuleCores[topclass+"_"+ca][field] = fieldrules[field]
 						}
 					}
@@ -573,26 +578,26 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 		}
 		if isSite {
 			sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(vinfo["pid"]), bson.M{"site_script": 1})
-			if (*sm) == nil || len(*sm) <=0{
+			if (*sm) == nil || len(*sm) <= 0 {
 				eSiteRuleCores = make(map[string]map[string][]*RuleCore)
 				fieldrules = map[string][]*RuleCore{}
 				continue
 			}
 			for _, v2 := range (*sm)["site_script"].([]interface{}) {
-				if mdpvalue, ok := Luacodes.Load(v2); ok {
+				if mdpvalue, ok := e.Luacodes.Load(v2); ok {
 					//属性配置
-					if mdpvalue.(map[string]interface{})["e.SiteRuleCores"] == nil{
+					if mdpvalue.(map[string]interface{})["e.SiteRuleCores"] == nil {
 						mdpvalue.(map[string]interface{})["e.SiteRuleCores"] = eSiteRuleCores
-					}else {
-						for k2,v2 :=range eSiteRuleCores{
+					} else {
+						for k2, v2 := range eSiteRuleCores {
 							tmpv := mdpvalue.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore)[k2]
-							for kkkk,vvv:=range v2{
-								tmpv[kkkk]=vvv
+							for kkkk, vvv := range v2 {
+								tmpv[kkkk] = vvv
 							}
-							mdpvalue.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore )[k2] = tmpv
+							mdpvalue.(map[string]interface{})["e.SiteRuleCores"].(map[string]map[string][]*RuleCore)[k2] = tmpv
 						}
 					}
-					Luacodes.Store(v2, mdpvalue)
+					e.Luacodes.Store(v2, mdpvalue)
 				}
 			}
 			eSiteRuleCores = make(map[string]map[string][]*RuleCore)
@@ -607,7 +612,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 			if v["subclass"] == nil {
 				e.RuleCores[topclass] = make(map[string][]*RuleCore)
 				for attr, _ := range v["fields"].(map[string]interface{}) {
-					if fieldrules[attr]!= nil{
+					if fieldrules[attr] != nil {
 						e.RuleCores[topclass][attr] = fieldrules[attr]
 					}
 				}
@@ -615,7 +620,7 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 				for ca, fs := range v["subclass"].(map[string]interface{}) {
 					e.RuleCores[topclass+"_"+ca] = make(map[string][]*RuleCore)
 					for field, _ := range fs.(map[string]interface{}) {
-						if fieldrules[field]!= nil {
+						if fieldrules[field] != nil {
 							e.RuleCores[topclass+"_"+ca][field] = fieldrules[field]
 						}
 					}
@@ -722,26 +727,26 @@ func (e *ExtractTask) InitTag(isSite bool) {
 			//ju.TagdbTable[fname] = &tab
 			if isSite {
 				sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(v["pid"]), bson.M{"site_script": 1})
-				if (*sm) == nil||len(*sm) <= 0{
+				if (*sm) == nil || len(*sm) <= 0 {
 					eSiteTag = map[string][]*Tag{}
 					continue
 				}
 				for _, v2 := range (*sm)["site_script"].([]interface{}) {
-					if  v2 == nil || v2 == "" {
+					if v2 == nil || v2 == "" {
 						continue
 					}
-					if mdpvalue, ok := Luacodes.Load(v2); ok {
-						if mdpvalue.(map[string]interface{})["e.SiteTag"] == nil{
+					if mdpvalue, ok := e.Luacodes.Load(v2); ok {
+						if mdpvalue.(map[string]interface{})["e.SiteTag"] == nil {
 							mdpvalue.(map[string]interface{})["e.SiteTag"] = eSiteTag
-						}else {
-							for k2,v2 := range eSiteTag{
+						} else {
+							for k2, v2 := range eSiteTag {
 								mdpvalue.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)[k2] = v2
 							}
 						}
-						Luacodes.Store(v2, mdpvalue)
+						e.Luacodes.Store(v2, mdpvalue)
 					}
-					tmpMap.Store(fname,&tab)
-					ju.SiteTagdbTable.Store(v2,tmpMap)
+					tmpMap.Store(fname, &tab)
+					ju.SiteTagdbTable.Store(v2, tmpMap)
 				}
 				//ju.SiteTagdbTable.Store(fname, &tab)
 				eSiteTag = map[string][]*Tag{}
@@ -794,20 +799,20 @@ func (e *ExtractTask) InitTag(isSite bool) {
 		}
 		if isSite {
 			sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(v["pid"]), bson.M{"site_script": 1})
-			if (*sm) == nil || len(*sm)<=0{
+			if (*sm) == nil || len(*sm) <= 0 {
 				eSiteTag = map[string][]*Tag{}
 				continue
 			}
 			for _, v2 := range (*sm)["site_script"].([]interface{}) {
-				if mdpvalue, ok := Luacodes.Load(v2); ok {
-					if mdpvalue.(map[string]interface{})["e.SiteTag"] == nil{
+				if mdpvalue, ok := e.Luacodes.Load(v2); ok {
+					if mdpvalue.(map[string]interface{})["e.SiteTag"] == nil {
 						mdpvalue.(map[string]interface{})["e.SiteTag"] = eSiteTag
-					}else {
-						for k2,v2 := range eSiteTag{
+					} else {
+						for k2, v2 := range eSiteTag {
 							mdpvalue.(map[string]interface{})["e.SiteTag"].(map[string][]*Tag)[k2] = v2
 						}
 					}
-					Luacodes.Store(v2, mdpvalue)
+					e.Luacodes.Store(v2, mdpvalue)
 				}
 			}
 			eSiteTag = map[string][]*Tag{}
@@ -848,26 +853,26 @@ func (e *ExtractTask) InitClearFn(isSite bool) {
 		}
 		if isSite {
 			sm, _ := db.Mgo.FindById("site_management", qu.ObjToString(tmp["pid"]), bson.M{"site_script": 1})
-			if (*sm) == nil|| len(*sm) <=0{
+			if (*sm) == nil || len(*sm) <= 0 {
 				fn = map[string][]string{}
 				continue
 			}
 			for _, v2 := range (*sm)["site_script"].([]interface{}) {
-				if mdpvalue, ok := Luacodes.Load(v2); ok {
-					if mdpvalue.(map[string]interface{})["e.SiteClearFn"]== nil{
+				if mdpvalue, ok := e.Luacodes.Load(v2); ok {
+					if mdpvalue.(map[string]interface{})["e.SiteClearFn"] == nil {
 						mdpvalue.(map[string]interface{})["e.SiteClearFn"] = fn
-					}else {
-						for k2,v2 := range fn{
+					} else {
+						for k2, v2 := range fn {
 							mdpvalue.(map[string]interface{})["e.SiteClearFn"].(map[string][]string)[k2] = v2
 						}
 					}
-					Luacodes.Store(v2, mdpvalue)
+					e.Luacodes.Store(v2, mdpvalue)
 				}
 			}
 			fn = map[string][]string{}
 		}
 	}
-	if !isSite{
+	if !isSite {
 		e.ClearFn = fn
 	}
 }

+ 11 - 14
src/jy/extract/extractudp.go

@@ -158,16 +158,15 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 				var j, jf *ju.Job
 				var isSite bool
-				var codeSite string
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf,isSite,codeSite = ext.PreInfo(v)
+					j, jf,isSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite,codeSite = ext.PreInfo(v)
+					j, _,isSite = ext.PreInfo(v)
 				}
-				ext.TaskInfo.ProcessPool <- true
-				go ext.ExtractProcess(j, jf,isSite,codeSite)
+				go ext.ExtractProcess(j, jf,isSite)
 				index++
+				ext.TaskInfo.ProcessPool <- true
 			}
 			list2, _ := ext.TaskInfo.FDB.Find(ext.TaskInfo.FromColl+"_back", query, nil, Fields, false, -1, -1)
 			for _, v := range *list2 {
@@ -176,16 +175,15 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				}
 				var j, jf *ju.Job
 				var isSite bool
-				var codeSite string
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf,isSite,codeSite = ext.PreInfo(v)
+					j, jf,isSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite,codeSite = ext.PreInfo(v)
+					j, _,isSite = ext.PreInfo(v)
 				}
-				ext.TaskInfo.ProcessPool <- true
-				go ext.ExtractProcess(j, jf,isSite,codeSite)
+				go ext.ExtractProcess(j, jf,isSite)
 				index++
+				ext.TaskInfo.ProcessPool <- true
 			}
 			db.Mgo.UpdateById("esctask", (*tsk)["_id"], map[string]interface{}{
 				"$set": map[string]interface{}{
@@ -225,19 +223,18 @@ func ExtractByUdp(sid, eid string, ra *net.UDPAddr, instanceId ...string) {
 				_id := qu.BsonIdToSId(v["_id"])
 				var j, jf *ju.Job
 				var isSite bool
-				var codeSite string
 				if ext.IsFileField && v["projectinfo"] != nil {
 					v["isextFile"] = true
-					j, jf,isSite,codeSite = ext.PreInfo(v)
+					j, jf,isSite = ext.PreInfo(v)
 				} else {
-					j, _,isSite,codeSite = ext.PreInfo(v)
+					j, _,isSite = ext.PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true
 				wg.Add(1)
 				go func(wg *sync.WaitGroup, j, jf *ju.Job) {
 					defer wg.Done()
 					//log.Debug(index,j.SourceMid,)
-					ext.ExtractProcess(j, jf,isSite,codeSite)
+					ext.ExtractProcess(j, jf,isSite)
 				}(&wg, j, jf)
 				index++
 				if index%1000 == 0 {

+ 1 - 1
src/jy/extract/score_jsondata.go

@@ -12,7 +12,7 @@ import (
 )
 
 func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.ExtField {
-	if len((j.Result)) <= 0 {
+	if len(j.Result) <= 0 {
 		return j.Result
 	}
 	tmps := make(map[string][]*util.ExtField)

+ 1 - 1
src/main_blocktest.go

@@ -121,7 +121,7 @@ func com(doc map[string]interface{}) {
 	}
 	e.TaskInfo.ProcessPool <- true
 	pretreated.AnalyStart(j,false,"")
-	e.ExtractProcess(j, nil,false,"")
+	e.ExtractProcess(j, nil,false)
 
 	log.Println("=============块信息================")
 	for _, v := range j.Block {

+ 50 - 2
src/web/templates/admin/site_management.html

@@ -39,6 +39,7 @@
                                 <th>网址</th>
                                 <th>脚本集</th>
                                 <th>站点状态</th>
+                                <th>抽取合并</th>
                                 <th>站点规则</th>
                                 <th>站点操作</th>
                             </tr>
@@ -100,6 +101,13 @@
                             <input id="isuse" name="isuse" type="radio" value="false"><label for="isuse">停用</label>
                         </div>
                     </div>
+                    <div class="form-group ckbox">
+                        <label for="code" id="fieldname" class="col-sm-2 control-label myred">抽取合并:</label>
+                        <div class="col-sm-10">
+                            <input id="ismerge" name="ismerge" type="radio" checked="checked" value="true"><label  for="isuse">启用</label>
+                            <input id="ismerge" name="ismerge" type="radio" value="false"><label for="isuse">停用</label>
+                        </div>
+                    </div>
                 </div>
                 <div class="modal-footer">
                     <button type="button" class="btn btn-default cancel" data-dismiss="modal" onclick="reset()">取消
@@ -156,6 +164,17 @@
                         return tmp
                     }
                 },
+                {
+                    "data": "ismerge", "width": "7%", render: function (val, a, row) {
+                        tmp = ""
+                        if (val) {
+                            tmp = "<a  title='停用' onclick='usemerge(\"" + row._id + "\"," + false + ")'><i class='fa fa-fw fa-circle text-green'></i></a>已启用"
+                        } else {
+                            tmp = "<a  title='启用' onclick='usemerge(\"" + row._id + "\"," + true + ")'><i class='fa fa-fw fa-circle text-red'></i></a>未启用"
+                        }
+                        return tmp
+                    }
+                },
                 {
                     "data": "_id", "width": "23%", render: function (val, a, row) {
                         tmp = '<div class="btn-group">' +
@@ -203,6 +222,7 @@
         var site_href = $("#site_href").val()//站点网址
         var site_script = $("#site_script").val()//站点脚本
         var isuse = $("input[name='isuse']:checked").val();
+        var ismerge = $("input[name='ismerge']:checked").val();
         if (site_name == "" || site_href == "" || site_script == "") {
             alert("站点填写不完整!");
             return false;
@@ -229,7 +249,8 @@
                 "site_href": site_href,
                 "site_script": site_script,
                 "vid": vid,
-                "isuse": isuse
+                "isuse": isuse,
+                "ismerge":ismerge
             },
             success: function (r) {
                 if (r.rep) {
@@ -265,7 +286,30 @@
             })
         });
     }
-
+    //usemerge
+    function usemerge(_id, ismerge) {
+        smg = ""
+        if (ismerge) {
+            smg = "确定启用?"
+        } else {
+            smg = "确定停用?"
+        }
+        showConfirm(smg, function () {
+            $.ajax({
+                url: "/admin/site_management/usemerge",
+                type: "post",
+                data: {"_id": _id, "ismerge": ismerge},
+                success: function (r) {
+                    if (r.rep) {
+                        window.location.href = "/admin/site_management?vid=" + vid
+                    } else {
+                        showTip("启用失败", 1000, function () {
+                        });
+                    }
+                }
+            })
+        });
+    }
     //修改
     function edit_table_btn(obj) {
         if (obj == "") {
@@ -291,6 +335,10 @@
                         $("input[name='isuse'][value='true']").attr("checked", false);
                         $("input[name='isuse'][value='false']").attr("checked", true);
                     }
+                    if (!r.data.ismerge) {
+                        $("input[name='ismerge'][value='true']").attr("checked", false);
+                        $("input[name='ismerge'][value='false']").attr("checked", true);
+                    }
                     $("#modal-info-version").modal("show");
                 } else {
                     showTip("查询错误!", 1200)