浏览代码

附件抽取

fengweiqiang 6 年之前
父节点
当前提交
7a26998737
共有 5 个文件被更改,包括 251 次插入190 次删除
  1. 7 6
      src/jy/extract/exportask.go
  2. 208 166
      src/jy/extract/extract.go
  3. 21 0
      src/jy/extract/extractInit.go
  4. 14 18
      src/jy/extract/extractudp.go
  5. 1 0
      src/jy/util/article.go

+ 7 - 6
src/jy/extract/exportask.go

@@ -59,6 +59,9 @@ func extractAndExport(v string, t map[string]interface{}) {
 	//品牌抽取是否开启
 	ju.IsBrandGoods = ju.Config["brandgoods"].(bool)
 
+	//附件抽取是否开启
+	e.InitFile()
+
 	query := t["query"]
 	limit := qu.IntAll(t["limit"])
 	list, _ := e.TaskInfo.FDB.Find(e.TaskInfo.FromColl, query, nil, Fields, false, 0, limit)
@@ -67,12 +70,10 @@ func extractAndExport(v string, t map[string]interface{}) {
 			continue
 		}
 		var j, jf *ju.Job
-		if e.IsFileField{
-			if v["projectinfo"] != nil {
-				v["isextFile"] = true
-				j, jf = PreInfo(v)
-			}
-		}else {
+		if e.IsFileField && v["projectinfo"] != nil {
+			v["isextFile"] = true
+			j, jf = PreInfo(v)
+		} else {
 			j, _ = PreInfo(v)
 		}
 		e.TaskInfo.ProcessPool <- true

+ 208 - 166
src/jy/extract/extract.go

@@ -10,7 +10,7 @@ import (
 	ju "jy/util"
 	"log"
 	qu "qfw/util"
-	redis "qfw/util/redis"
+	"qfw/util/redis"
 	"reflect"
 	"regexp"
 	"strconv"
@@ -22,13 +22,13 @@ import (
 )
 
 var (
-	lock          sync.RWMutex
-	cut           = ju.NewCut()                          //获取正文并清理
-	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask                //任务列表
-	ClearTaskList map[string]*ClearTask                  //清理任务列表
-	saveLimit     = 200                                  //抽取日志批量保存
-	PageSize      = 5000                                 //查询分页
+	lock    sync.RWMutex
+	cut     = ju.NewCut()                          //获取正文并清理
+	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask          //任务列表
+	ClearTaskList map[string]*ClearTask            //清理任务列表
+	saveLimit     = 200                            //抽取日志批量保存
+	PageSize      = 5000                           //查询分页
 	Fields        = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -59,7 +59,8 @@ func StartExtractTestTask(taskId, startId, num, resultcoll, trackcoll string) bo
 
 	//品牌抽取是否开启
 	ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
-
+	//附件抽取是否开启
+	ext.InitFile()
 	return RunExtractTestTask(ext, startId, num)
 }
 
@@ -80,12 +81,10 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 				continue
 			}
 			var j, jf *ju.Job
-			if ext.IsFileField{
-				if v["projectinfo"] != nil {
+			if ext.IsFileField &&v["projectinfo"] != nil {
 					v["isextFile"] = true
 					j, jf = PreInfo(v)
-				}
-			}else {
+			} else {
 				j, _ = PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
@@ -131,6 +130,8 @@ func StartExtractTaskId(taskId string) bool {
 
 	//品牌抽取是否开启
 	ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
+	//附件抽取是否开启
+	ext.InitFile()
 
 	ext.IsRun = true
 	go ext.ResultSave()
@@ -181,12 +182,10 @@ func RunExtractTask(taskId string) {
 				break
 			}
 			var j, jf *ju.Job
-			if ext.IsFileField{
-				if v["projectinfo"] != nil {
-					v["isextFile"] = true
-					j, jf = PreInfo(v)
-				}
-			}else {
+			if ext.IsFileField && v["projectinfo"] != nil {
+				v["isextFile"] = true
+				j, jf = PreInfo(v)
+			} else {
 				j, _ = PreInfo(v)
 			}
 			ext.TaskInfo.ProcessPool <- true
@@ -207,7 +206,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 	defer qu.Catch()
 	//判断是否有附件这个字段
 	var isextFile bool
-	if doc["isextFile"] != nil{
+	if doc["isextFile"] != nil {
 		isextFile = doc["isextFile"].(bool)
 	}
 	detail := ""
@@ -223,7 +222,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 	doc["detail"] = detail
 
 	if isextFile {
-		file2text(&doc)  //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
+		file2text(&doc) //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
 	}
 	toptype := qu.ObjToString(doc["toptype"])
 	if qu.ObjToString(doc["type"]) == "bid" {
@@ -258,6 +257,7 @@ func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 			Province:   qu.ObjToString(doc["area"]),
 			Result:     map[string][]*ju.ExtField{},
 			BuyerAddr:  qu.ObjToString(doc["buyeraddr"]),
+			IsFile:     isextFile,
 		}
 	}
 	qu.Try(func() {
@@ -298,26 +298,28 @@ func file2text(doc *map[string]interface{}) {
 			}
 		}
 	}
-	if utf8.RuneCountInString(strfileinfo.String()) < qu.IntAllDef(ju.Config["filelength"],100000 ){
+	if utf8.RuneCountInString(strfileinfo.String()) < qu.IntAllDef(ju.Config["filelength"], 100000) {
 		(*doc)["detailfile"] = strfileinfo.String() //附件文本堆一起(后期可以考虑,分开处理)
 	}
 }
 
 //抽取
 func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
+	e.ExtractDetail(j)
+	if jf !=nil &&jf.IsFile {
+		e.ExtractFile(jf)
+	}
+	//分析抽取结果并保存 todo
+	AnalysisSaveResult(j, jf, e)
+	<-e.TaskInfo.ProcessPool
+}
+
+func (e *ExtractTask) ExtractDetail(j *ju.Job) {
 	qu.Try(func() {
 		doc := *j.Data
-		docfile := make(map[string]interface{})
-		if jf != nil{
-			docfile = *jf.Data
-			docfile["dockey"]= "detailfile"
-		}
 		//全局前置规则,结果覆盖doc属性
 		for _, v := range e.RulePres {
 			doc = ExtRegPre(doc, j, v, e.TaskInfo)
-			if jf != nil{
-				docfile = ExtRegPre(docfile, jf, v, e.TaskInfo)
-			}
 		}
 		//抽取规则
 		for _, vc := range e.RuleCores {
@@ -351,46 +353,106 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
 			}
 			//log.Println("抽取-后置规则", tmp)
 		}
-		//抽取规则-附件
-		if jf != nil{
-			for _, vc := range e.RuleCores {
-				tmp := ju.DeepCopy(docfile).(map[string]interface{})
-				//是否进入逻辑
-				if !ju.Logic(vc.LuaLogic, tmp) {
-					continue
-				}
-				//抽取-前置规则
-				for _, v := range vc.RulePres {
-					tmp = ExtRegPre(tmp, jf, v, e.TaskInfo)
-				}
-				//log.Println("抽取-前置规则", tmp)
 
-				//抽取-规则
-				for _, v := range vc.RuleCores {
-					ExtRegCore(vc.ExtFrom, tmp, jf, v, e)
+		//全局后置规则
+		for _, v := range e.RuleBacks {
+			ExtRegBack(j, v, e.TaskInfo)
+		}
+		//候选人加入
+		if len(j.Winnerorder) > 0 {
+			winner := &ju.ExtField{
+				Field:     "winner",
+				Code:      "",
+				RuleText:  "",
+				Type:      "winnerorder",
+				MatchType: "winnerorder",
+				ExtFrom:   "",
+				Value:     j.Winnerorder[0]["entname"],
+				Score:     0,
+			}
+			if len([]rune(qu.ObjToString(j.Winnerorder[0]["entname"]))) < 4 {
+				winner.Score = -5
+			}
+			winners := j.Result["winner"]
+			if winners != nil {
+				winners = append(winners, winner)
+			} else {
+				winners = []*ju.ExtField{}
+				winners = append(winners, winner)
+			}
+			j.Result["winner"] = winners
+		}
+		//函数清理
+		for key, val := range j.Result {
+			for _, v := range val {
+				lock.Lock()
+				cfn := e.ClearFn[key]
+				lock.Unlock()
+				data := clear.DoClearFn(cfn, []interface{}{v.Value, j.Content})
+				v.Value = data[0]
+				//清理特殊符号
+				lock.Lock()
+				if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
+					clear.MesField[key] != nil {
+					text := qu.ObjToString(v.Value)
+					text = clear.OtherClean(key, text)
+					v.Value = text
 				}
-				//log.Println("抽取-规则", tmp)
+				lock.Unlock()
+			}
+		}
+		PackageDetail(j, e) //处理分包信息
+		//		bs, _ := json.Marshal(j.Result)
+		//		log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
+	}, func(err interface{}) {
+		log.Println("ExtractProcess err", err)
+	})
+}
+func (e *ExtractTask) ExtractFile(j *ju.Job) {
+	qu.Try(func() {
+		doc := *j.Data
+		//全局前置规则,结果覆盖doc属性
+		for _, v := range e.RulePres {
+			if e.FileFields[v.Field] > 0 {
+				doc = ExtRegPre(doc, j, v, e.TaskInfo)
+			}
+		}
+		//抽取规则
+		for _, vc := range e.RuleCores {
+			tmp := ju.DeepCopy(doc).(map[string]interface{})
+			//是否进入逻辑
+			if !ju.Logic(vc.LuaLogic, tmp) {
+				continue
+			}
+			//抽取-前置规则
+			for _, v := range vc.RulePres {
+				if e.FileFields[vc.Field] > 0 {
+					tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
+				}
+			}
+			//log.Println("抽取-前置规则", tmp)
 
-				//项目名称未能抽取到,标题来凑
-				if vc.Field == "projectname" {
-					if len(jf.Result[vc.Field]) < 1 {
-						jf.Result[vc.Field] = append(jf.Result[vc.Field], &ju.ExtField{vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, jf.Title, 0})
-					}
+			//抽取-规则
+			for _, v := range vc.RuleCores {
+				if e.FileFields[vc.Field] > 0 {
+					ExtRegCore(vc.ExtFrom, tmp, j, v, e)
 				}
+			}
+			//log.Println("抽取-规则", tmp)
 
-				//抽取-后置规则
-				for _, v := range vc.RuleBacks {
-					ExtRegBack(jf, v, e.TaskInfo)
+			//抽取-后置规则
+			for _, v := range vc.RuleBacks {
+				if e.FileFields[vc.Field] > 0 {
+					ExtRegBack(j, v, e.TaskInfo)
 				}
-				//log.Println("抽取-后置规则", tmp)
 			}
+			//log.Println("抽取-后置规则", tmp)
 		}
 
 		//全局后置规则
 		for _, v := range e.RuleBacks {
-			ExtRegBack(j, v, e.TaskInfo)
-			if jf != nil {
-				ExtRegBack(jf, v, e.TaskInfo)
+			if e.FileFields[v.Field] > 0 {
+				ExtRegBack(j, v, e.TaskInfo)
 			}
 		}
 		//候选人加入
@@ -417,32 +479,6 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
 			}
 			j.Result["winner"] = winners
 		}
-		//候选人加入-附件
-		if jf != nil{
-			if len(jf.Winnerorder) > 0 {
-				winner := &ju.ExtField{
-					Field:     "winner",
-					Code:      "",
-					RuleText:  "",
-					Type:      "winnerorder",
-					MatchType: "winnerorder",
-					ExtFrom:   "",
-					Value:     jf.Winnerorder[0]["entname"],
-					Score:     0,
-				}
-				if len([]rune(qu.ObjToString(jf.Winnerorder[0]["entname"]))) < 4 {
-					winner.Score = -5
-				}
-				winners := jf.Result["winner"]
-				if winners != nil {
-					winners = append(winners, winner)
-				} else {
-					winners = []*ju.ExtField{}
-					winners = append(winners, winner)
-				}
-				jf.Result["winner"] = winners
-			}
-		}
 		//函数清理
 		for key, val := range j.Result {
 			for _, v := range val {
@@ -462,42 +498,13 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
 				lock.Unlock()
 			}
 		}
-		//函数清理-附件
-		if jf != nil{
-			for key, val := range jf.Result {
-				for _, v := range val {
-					lock.Lock()
-					cfn := e.ClearFn[key]
-					lock.Unlock()
-					data := clear.DoClearFn(cfn, []interface{}{v.Value, jf.Content})
-					v.Value = data[0]
-					//清理特殊符号
-					lock.Lock()
-					if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
-						clear.MesField[key] != nil {
-						text := qu.ObjToString(v.Value)
-						text = clear.OtherClean(key, text)
-						v.Value = text
-					}
-					lock.Unlock()
-				}
-			}
-		}
+
 		PackageDetail(j, e) //处理分包信息
-		if jf != nil{
-			PackageDetail(jf, e) //处理分包信息-附件
-		}
 		//		bs, _ := json.Marshal(j.Result)
 		//		log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
-		//分析抽取结果并保存 todo
-		AnalysisSaveResult(j, e)
-		if jf != nil{
-			AnalysisSaveResult(jf, e) //分析抽取结果并保存-附件
-		}
 	}, func(err interface{}) {
 		log.Println("ExtractProcess err", err)
 	})
-	<-e.TaskInfo.ProcessPool
 }
 
 //前置过滤
@@ -517,9 +524,9 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 		AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
 	} else {
 		var key string
-		if doc["dockey"]== nil{
+		if !j.IsFile {
 			key = qu.If(in.Field == "", "detail", in.Field).(string)
-		}else {
+		} else {
 			key = qu.If(in.Field == "", "detailfile", in.Field).(string)
 		}
 		text := qu.ObjToString(doc[key])
@@ -967,54 +974,9 @@ type FieldValue struct {
 }
 
 //分析抽取结果并保存
-func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
+func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 	qu.Try(func() {
-		doc := j.Data
-		result := j.Result
-		_id := qu.BsonIdToSId((*doc)["_id"])
-		iscore, _ := ju.Config["fieldscore"].(bool)
-		if iscore { //打分
-			result = ScoreFields(j)
-		}
-		//结果排序
-		values := map[string][]*ju.SortObject{}
-		for key, val := range result {
-			fieldValue := map[string][]interface{}{}
-			if iscore { //走打分
-				for _, v := range val {
-					if len(fmt.Sprint(v.Value)) < 1 {
-						continue //去除空串
-					}
-					fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value}
-				}
-			} else { //不走打分,按出现频次
-				for _, v := range val {
-					if len(fmt.Sprint(v.Value)) < 1 {
-						continue //去除空串
-					}
-					if fieldValue[fmt.Sprint(v.Value)] == nil {
-						fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
-					} else {
-						fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
-					}
-				}
-			}
-			objects := []*ju.SortObject{}
-			for k, v := range fieldValue {
-				ValueStr := "" //第二排序
-				if reflect.TypeOf(v[1]).String() == "string" {
-					ValueStr = qu.ObjToString(v[1])
-				}
-				tmp := &ju.SortObject{
-					Key:      k,
-					Value:    qu.IntAll(v[0]),
-					Object:   v[1],
-					ValueStr: ValueStr,
-				}
-				objects = append(objects, tmp)
-			}
-			values[key] = ju.ExtSort(objects)
-		}
+		doc, result, _id, values := funcAnalysis(j)
 		//从排序结果中取值
 		tmp := map[string]interface{}{} //抽取值
 		for key, val := range values {
@@ -1031,9 +993,31 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 		if len(j.Winnerorder) > 0 { //候选人信息
 			tmp["winnerorder"] = j.Winnerorder
 		}
+		//处理附件
+		var resultf map[string][]*ju.ExtField
+		var filevalues map[string][]*ju.SortObject
+		if jf != nil {
+			_, resultf, _, filevalues = funcAnalysis(jf)
+			ffield := map[string]interface{}{}
+			for key, val := range filevalues {
+				for _, v := range val { //取第一个非负数
+					if v.Key != "" && v.Value > -1 {
+						ffield[key] = v.Object
+						break
+					}
+				}
+			}
+			if len(jf.PackageInfo) > 0 { //分包信息
+				ffield["package"] = jf.PackageInfo
+			}
+			if len(jf.Winnerorder) > 0 { //候选人信息
+				ffield["winnerorder"] = jf.Winnerorder
+			}
+			tmp["ffield"] = ffield
+		}
 		for k, v := range *doc {
 			//去重冗余字段
-			if k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" {
+			if delFiled(k) {
 				continue
 			}
 			if tmp[k] == nil {
@@ -1086,6 +1070,7 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 			if b, ok := ju.Config["saveresult"].(bool); ok && b {
 				id := tmp["_id"]
 				tmp["result"] = result
+				tmp["resultf"] = resultf
 				delete(tmp, "_id")
 				tmparr := []map[string]interface{}{
 					map[string]interface{}{
@@ -1102,6 +1087,7 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 				tmp["epackage"] = string(bs)
 			}
 			tmp["result"] = result
+			tmp["resultf"] = resultf
 			b := db.Mgo.Update(e.TaskInfo.TestColl, `{"_id":"`+_id+`"}`, map[string]interface{}{"$set": tmp}, true, false)
 			if !b {
 				log.Println(e.TaskInfo.TestColl, _id)
@@ -1112,6 +1098,62 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 	})
 }
 
+//去重冗余字段
+func delFiled(k string) bool {
+	return k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" || k == "projectinfo"
+}
+
+func funcAnalysis(j *ju.Job) (*map[string]interface{}, map[string][]*ju.ExtField, string, map[string][]*ju.SortObject) {
+	defer qu.Catch()
+	doc := j.Data
+	result := j.Result
+	_id := qu.BsonIdToSId((*doc)["_id"])
+	iscore, _ := ju.Config["fieldscore"].(bool)
+	if iscore { //打分
+		result = ScoreFields(j)
+	}
+	//结果排序
+	values := map[string][]*ju.SortObject{}
+	for key, val := range result {
+		fieldValue := map[string][]interface{}{}
+		if iscore { //走打分
+			for _, v := range val {
+				if len(fmt.Sprint(v.Value)) < 1 {
+					continue //去除空串
+				}
+				fieldValue[fmt.Sprint(v.Value)+v.Type] = []interface{}{v.Score, v.Value}
+			}
+		} else { //不走打分,按出现频次
+			for _, v := range val {
+				if len(fmt.Sprint(v.Value)) < 1 {
+					continue //去除空串
+				}
+				if fieldValue[fmt.Sprint(v.Value)] == nil {
+					fieldValue[fmt.Sprint(v.Value)] = []interface{}{0, v.Value}
+				} else {
+					fieldValue[fmt.Sprint(v.Value)][0] = qu.IntAll(fieldValue[fmt.Sprint(v.Value)][0]) + 1
+				}
+			}
+		}
+		objects := []*ju.SortObject{}
+		for k, v := range fieldValue {
+			ValueStr := "" //第二排序
+			if reflect.TypeOf(v[1]).String() == "string" {
+				ValueStr = qu.ObjToString(v[1])
+			}
+			tmp := &ju.SortObject{
+				Key:      k,
+				Value:    qu.IntAll(v[0]),
+				Object:   v[1],
+				ValueStr: ValueStr,
+			}
+			objects = append(objects, tmp)
+		}
+		values[key] = ju.ExtSort(objects)
+	}
+	return doc, result, _id, values
+}
+
 func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 	defer qu.Catch()
 	//获取审核字段
@@ -1150,7 +1192,7 @@ func (e *ExtractTask) QualityAudit(resulttmp map[string]interface{}) {
 func (e *ExtractTask) RedisMatch(field, fv string, val map[string]interface{}) {
 	defer qu.Catch()
 	i := redis.GetInt(field, field+"_"+fv) //查找redis
-	if i == 0 {                            //reids未找到,执行规则匹配
+	if i == 0 { //reids未找到,执行规则匹配
 		val[field+"_isredis"] = false
 		e.RuleMatch(field, fv, val) //规则匹配
 	} else { //redis找到,打标识存库

+ 21 - 0
src/jy/extract/extractInit.go

@@ -908,6 +908,27 @@ func (e *ExtractTask) InitAuditFields() {
 	}
 }
 
+//加载附件抽取
+func (e *ExtractTask) InitFile() {
+	defer qu.Catch()
+	//query:=bson.M{"version":e.TaskInfo.Version,"delete":false}
+	ve, _ := db.Mgo.FindOne("version", `{"version":"`+e.TaskInfo.Version+`","delete":false}`)
+	//ve, _ := db.Mgo.FindOne("version", query)
+	if ve == nil{
+		return
+	}
+	if (*ve)["isfiles"]!=nil && (*ve)["isfiles"].(bool){
+		e.IsFileField =true
+	}
+	efiled := make(map[string]int,0)
+	if (*ve)["s_filefileds"] != nil{
+		for _,vff :=range (*ve)["s_filefileds"].([]interface{}) {
+			efiled[vff.(string)]=1
+		}
+	}
+	e.FileFields = efiled
+}
+
 //加载清理任务信息
 func (c *ClearTask) InitClearTaskInfo() {
 	cleartask, _ := db.Mgo.FindById("cleartask", c.Id, nil)

+ 14 - 18
src/jy/extract/extractudp.go

@@ -106,6 +106,8 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 
 	//品牌抽取是否开启
 	ju.IsBrandGoods, _ = ju.Config["brandgoods"].(bool)
+	//附件抽取是否开启
+	ext.InitFile()
 
 	go ext.ResultSave()
 	go ext.BidSave()
@@ -146,12 +148,10 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 					_id := qu.BsonIdToSId(v["_id"])
 					log.Println(_id)
 					var j, jf *ju.Job
-					if ext.IsFileField{
-						if v["projectinfo"] != nil {
-							v["isextFile"] = true
-							j, jf = PreInfo(v)
-						}
-					}else {
+					if ext.IsFileField && v["projectinfo"] != nil {
+						v["isextFile"] = true
+						j, jf = PreInfo(v)
+					} else {
 						j, _ = PreInfo(v)
 					}
 					ext.TaskInfo.ProcessPool <- true
@@ -174,12 +174,10 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 					_id := qu.BsonIdToSId(v["_id"])
 					log.Println(_id)
 					var j, jf *ju.Job
-					if ext.IsFileField{
-						if v["projectinfo"] != nil {
-							v["isextFile"] = true
-							j, jf = PreInfo(v)
-						}
-					}else {
+					if ext.IsFileField && v["projectinfo"] != nil {
+						v["isextFile"] = true
+						j, jf = PreInfo(v)
+					} else {
 						j, _ = PreInfo(v)
 					}
 					ext.TaskInfo.ProcessPool <- true
@@ -216,12 +214,10 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 				_id := qu.BsonIdToSId(v["_id"])
 				log.Println(_id)
 				var j, jf *ju.Job
-				if ext.IsFileField{
-					if v["projectinfo"] != nil {
-						v["isextFile"] = true
-						j, jf = PreInfo(v)
-					}
-				}else {
+				if ext.IsFileField && v["projectinfo"] != nil {
+					v["isextFile"] = true
+					j, jf = PreInfo(v)
+				} else {
 					j, _ = PreInfo(v)
 				}
 				ext.TaskInfo.ProcessPool <- true

+ 1 - 0
src/jy/util/article.go

@@ -24,6 +24,7 @@ type Job struct {
 	HasKey    int                   //是否匹配到table中的标题
 	HasBrand  int                   //有品牌
 	HasGoods  int                   //有商品
+	IsFile    bool                  //有附件
 }
 
 type ExtField struct {