Quellcode durchsuchen

Merge branch 'dev3.2' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.2

wangji vor 6 Jahren
Ursprung
Commit
56c43c2d9d

+ 2 - 1
src/config.json

@@ -38,5 +38,6 @@
         "LaunchTemplateId4": "lt-2ze19qyi8votdjgeq2ma",
         "LaunchTemplateId8": "lt-2zeidqiydzusn7hw7lt8",
         "VSwitchId": "vsw-2ze23am2bl9e3v6rnyhfb"
-    }
+    },
+    "filelength": 100000
 } 

+ 121 - 0
src/extractbrand/src/brand.go

@@ -0,0 +1,121 @@
+package main
+
+import (
+	"log"
+	"regexp"
+	"time"
+	//"qfw/util"
+	"sync"
+
+	. "gopkg.in/mgo.v2/bson"
+)
+
+var wg *sync.WaitGroup
+var lock *sync.Mutex
+var letter *regexp.Regexp = regexp.MustCompile(`^[a-zA-Z]+&?`)
+
+func InitBrand() {
+	//初始化db
+	sess := brandMgo.GetMgoConn()
+	defer brandMgo.DestoryMongoConn(sess)
+
+	BrandDFA = &DFA{
+		Link: make(map[string]interface{}),
+	}
+	//查品牌库品牌
+	var res []M
+	//c, _ := sess.DB("spider").C("JD_commodity").Count()
+	sess.DB(brandDbname).C(brandCollname).Pipe([]M{M{"$group": M{"_id": "$brand"}}}).All(&res)
+	n := 0
+	for _, b := range res {
+		brand := b["_id"].(string)
+		if len(brand) > 50 || len(brand) == 1 || (len(brand) == 3 && !letter.MatchString(brand)) {
+			log.Println("err brand:", brand)
+			continue
+		}
+		n++
+		BrandDFA.AddWord(brand) //将品牌库加入DFA
+	}
+	log.Println("brand num :", n)
+}
+
+func UdpTask(sid, eid string) {
+	t := time.Now()
+	log.Println("执行任务")
+	query := M{"_id": M{"$gte": ObjectIdHex(sid), "$lte": ObjectIdHex(eid)}}
+	//附件库
+	sess := appendixMgo.GetMgoConn()
+	defer appendixMgo.DestoryMongoConn(sess)
+
+	poolSize := make(chan bool, pool)
+	wg = &sync.WaitGroup{}
+	lock = &sync.Mutex{}                   //控制读写
+	update := [][]map[string]interface{}{} //批量更新的数据
+
+	data := sess.DB(appendixDbname).C(appendixCollname).Find(query).Sort("_id").Iter()
+	sum := 0
+	for tmp := make(map[string]interface{}); data.Next(tmp); sum++ {
+		if sum%100 == 0 {
+			log.Println("current:", sum)
+		}
+		poolSize <- true
+		wg.Add(1)
+		go func(d map[string]interface{}) {
+			defer func() {
+				<-poolSize
+				wg.Done()
+			}()
+
+			brandArr := GetBrand(d)
+			if len(brandArr) > 0 { //匹配到品牌再处理
+				tmpArr := []map[string]interface{}{} //存储某条数据的id和要更新内容
+				_id := map[string]interface{}{
+					"_id": d["_id"],
+				}
+				tmpArr = append(tmpArr, _id)
+				//				pushAll := map[string]interface{}{
+				//					"$pushAll": map[string]interface{}{
+				//						"conbrand": brandArr,
+				//					},
+				//				}
+				addToSet := map[string]interface{}{
+					"$addToSet": map[string]interface{}{
+						"conbrand": map[string]interface{}{
+							"$each": brandArr,
+						},
+					},
+				}
+				tmpArr = append(tmpArr, addToSet)
+				lock.Lock()
+				update = append(update, tmpArr)
+				if len(update) > savesize {
+					appendixMgo.UpdateBulk(appendixCollname, update...)
+					update = [][]map[string]interface{}{} //更新后把数据置空
+				}
+				lock.Unlock()
+			}
+		}(tmp)
+		tmp = make(map[string]interface{})
+	}
+	wg.Wait()
+	lock.Lock()
+	if len(update) > 0 {
+		appendixMgo.UpdateBulk(appendixCollname, update...)
+		update = [][]map[string]interface{}{} //更新后把数据置空
+	}
+	lock.Unlock()
+	log.Println("--task over--", time.Since(t).Seconds())
+}
+
+func GetBrand(data map[string]interface{}) (brandArr []string) {
+	if projectinfo, ok := data["projectinfo"].(map[string]interface{}); ok {
+		attachments := projectinfo["attachments"].(map[string]interface{})
+		for _, m := range attachments {
+			val := m.(map[string]interface{})
+			if content, ok := val["content"].(string); ok { //附件文本
+				brandArr = append(brandArr, BrandDFA.CheckSensitiveWord(content)...)
+			}
+		}
+	}
+	return
+}

+ 17 - 0
src/extractbrand/src/config.json

@@ -0,0 +1,17 @@
+{
+	"udpport":"1482",
+	"pool":6,
+	"savesize":200,
+	"brand":{
+		"mgodb":"192.168.3.207:27082",
+		"dbsize": 2,
+		"dbname":"spider",
+		"collname":"JD_commodity"
+	},
+	"appendix":{
+		"mgodb":"192.168.3.207:27082",
+		"dbsize": 2,
+		"dbname":"mxs",
+		"collname":"bidding_file"
+	}
+}

+ 59 - 0
src/extractbrand/src/dfa.go

@@ -0,0 +1,59 @@
+package main
+
+import (
+	"qfw/util"
+)
+
+var BrandDFA *DFA
+
+type DFA struct {
+	Link map[string]interface{}
+}
+
+func (d *DFA) AddWord(keys ...string) {
+	d.AddWordAll(true, keys...)
+}
+func (d *DFA) AddWordAll(haskey bool, keys ...string) {
+	if d.Link == nil {
+		d.Link = make(map[string]interface{})
+	}
+	for _, key := range keys {
+		nowMap := &d.Link
+		for i := 0; i < len(key); i++ {
+			kc := key[i : i+1]
+			if v, ok := (*nowMap)[kc]; ok {
+				nowMap, _ = v.(*map[string]interface{})
+			} else {
+				newMap := map[string]interface{}{}
+				newMap["YN"] = "0" //不是最后一个
+				(*nowMap)[kc] = &newMap
+				nowMap = &newMap
+			}
+			if i == len(key)-1 {
+				(*nowMap)["YN"] = "1" //最后一个
+				if haskey {
+					(*nowMap)["K"] = key
+				}
+			}
+		}
+	}
+}
+
+func (d *DFA) CheckSensitiveWord(src string) []string {
+	res := make([]string, 0)
+	for j := 0; j < len(src); j++ {
+		nowMap := &d.Link
+		for i := j; i < len(src); i++ {
+			word := src[i : i+1]
+			nowMap, _ = (*nowMap)[word].(*map[string]interface{})
+			if nowMap != nil { // 存在,则判断是否为最后一个
+				if "1" == util.ObjToString((*nowMap)["YN"]) {
+					res = append(res, util.ObjToString((*nowMap)["K"]))
+				}
+			} else {
+				break
+			}
+		}
+	}
+	return res
+}

+ 96 - 0
src/extractbrand/src/main.go

@@ -0,0 +1,96 @@
+package main
+
+import (
+	"encoding/json"
+	"log"
+	mu "mfw/util"
+	"net"
+	"qfw/util"
+	. "qfw/util/mongodb"
+	"time"
+)
+
+var (
+	Sysconfig   map[string]interface{} //配置文件
+	brandMgo    *MongodbSim            //mongodb操作对象
+	appendixMgo *MongodbSim            //mongodb操作对象
+	udpclient   mu.UdpClient           //udp对象
+	udpport     string                 //udp端口
+	pool        int                    //并发数
+	savesize    int
+	//品牌库信息
+	brandMgodb    string
+	brandDbname   string
+	brandDbsize   int
+	brandCollname string
+	//附件库信息
+	appendixMgodb    string
+	appendixDbname   string
+	appendixDbsize   int
+	appendixCollname string
+)
+
+func init() {
+	util.ReadConfig("config.json", &Sysconfig)
+	udpport, _ = Sysconfig["udpport"].(string)
+	pool = util.IntAllDef(Sysconfig["pool"], 5)
+	savesize = util.IntAllDef(Sysconfig["savesize"], 200)
+	//品牌库
+	brand := Sysconfig["brand"].(map[string]interface{})
+	brandMgodb, _ = brand["mgodb"].(string)
+	brandDbname, _ = brand["dbname"].(string)
+	brandDbsize = util.IntAllDef(brand["dbsize"], 5)
+	brandCollname, _ = brand["collname"].(string)
+	brandMgo = &MongodbSim{
+		MongodbAddr: brandMgodb,
+		Size:        brandDbsize,
+		DbName:      brandDbname,
+	}
+	brandMgo.InitPool()
+	//附件库
+	appendix := Sysconfig["appendix"].(map[string]interface{})
+	appendixMgodb, _ = appendix["mgodb"].(string)
+	appendixDbname, _ = appendix["dbname"].(string)
+	appendixDbsize = util.IntAllDef(appendix["dbsize"], 5)
+	appendixCollname, _ = appendix["collname"].(string)
+	appendixMgo = &MongodbSim{
+		MongodbAddr: appendixMgodb,
+		Size:        appendixDbsize,
+		DbName:      appendixDbname,
+	}
+
+	appendixMgo.InitPool()
+	//初始化品牌库
+	InitBrand()
+
+}
+func main() {
+	log.Println("udpport", udpport)
+	udpclient = mu.UdpClient{Local: ":" + udpport, BufSize: 1024}
+	udpclient.Listen(processUdpMsg)
+	log.Println("Udp服务监听", udpport)
+	time.Sleep(99999 * time.Hour)
+}
+
+func processUdpMsg(act byte, data []byte, ra *net.UDPAddr) {
+	defer util.Catch()
+	switch act {
+	case mu.OP_TYPE_DATA: //上个节点的数据
+		var mapInfo map[string]interface{}
+		err := json.Unmarshal(data, &mapInfo)
+		log.Println("err:", err, "mapInfo:", mapInfo)
+		if err != nil {
+			udpclient.WriteUdp([]byte("err:"+err.Error()), mu.OP_NOOP, ra)
+		} else if mapInfo != nil {
+			sid, _ := mapInfo["gtid"].(string)
+			eid, _ := mapInfo["lteid"].(string)
+			udpclient.WriteUdp([]byte("udpok"), mu.OP_NOOP, ra)
+			UdpTask(sid, eid)
+		}
+	case mu.OP_NOOP: //下个节点回应
+		ok := string(data)
+		if ok != "" {
+			log.Println("ok:", ok)
+		}
+	}
+}

+ 3 - 3
src/jy/admin/rulecheck.go

@@ -286,7 +286,7 @@ func checkCoreReg(field, content, ruleText string) map[string]string {
 //lua脚本前置过滤验证
 func checkPreScript(code, name, infoid, script string) map[string]interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j := extract.PreInfo(*doc)
+	j,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block
@@ -306,7 +306,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 	e.InitRuleCore()
 	e.InitTag()
 	tmp, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j := extract.PreInfo(*tmp)
+	j,_ := extract.PreInfo(*tmp)
 	doc := *j.Data
 	//全局前置规则,结果覆盖doc属性
 	for _, v := range e.RulePres {
@@ -350,7 +350,7 @@ func checkBackScript(table, code, name, version, infoid, script string, alone bo
 //lua脚本抽取验证
 func checkCoreScript(code, name, infoid, script string) interface{} {
 	doc, _ := Mgo.FindById("bidding", infoid, extract.Fields)
-	j := extract.PreInfo(*doc)
+	j ,_ := extract.PreInfo(*doc)
 	delete(*j.Data, "contenthtml")
 	lua := ju.LuaScript{Code: code, Name: name, Doc: *j.Data, Script: script}
 	lua.Block = j.Block

+ 9 - 1
src/jy/extract/exportask.go

@@ -66,7 +66,15 @@ func extractAndExport(v string, t map[string]interface{}) {
 		if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
 			continue
 		}
-		j, jf := PreInfo(v, false)
+		var j, jf *ju.Job
+		if e.IsFileField{
+			if v["projectinfo"] != nil {
+				v["isextFile"] = true
+				j, jf = PreInfo(v)
+			}
+		}else {
+			j, _ = PreInfo(v)
+		}
 		e.TaskInfo.ProcessPool <- true
 		go e.ExtractProcess(j, jf)
 	}

+ 168 - 7
src/jy/extract/extract.go

@@ -1,6 +1,7 @@
 package extract
 
 import (
+	"bytes"
 	"encoding/json"
 	"fmt"
 	"jy/clear"
@@ -15,6 +16,7 @@ import (
 	"strconv"
 	"sync"
 	"time"
+	"unicode/utf8"
 
 	"gopkg.in/mgo.v2/bson"
 )
@@ -27,7 +29,7 @@ var (
 	ClearTaskList map[string]*ClearTask                  //清理任务列表
 	saveLimit     = 200                                  //抽取日志批量保存
 	PageSize      = 5000                                 //查询分页
-	Fields        = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1}`
+	Fields        = `{"title":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
 
@@ -77,8 +79,15 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 			if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
 				continue
 			}
-			//log.Println(v["_id"])
-			j, jf := PreInfo(v, false)
+			var j, jf *ju.Job
+			if ext.IsFileField{
+				if v["projectinfo"] != nil {
+					v["isextFile"] = true
+					j, jf = PreInfo(v)
+				}
+			}else {
+				j, _ = PreInfo(v)
+			}
 			ext.TaskInfo.ProcessPool <- true
 			go ext.ExtractProcess(j, jf)
 		}
@@ -171,7 +180,15 @@ func RunExtractTask(taskId string) {
 			if !ext.IsRun {
 				break
 			}
-			j, jf := PreInfo(v, false)
+			var j, jf *ju.Job
+			if ext.IsFileField{
+				if v["projectinfo"] != nil {
+					v["isextFile"] = true
+					j, jf = PreInfo(v)
+				}
+			}else {
+				j, _ = PreInfo(v)
+			}
 			ext.TaskInfo.ProcessPool <- true
 			go ext.ExtractProcess(j, jf)
 			ext.TaskInfo.LastExtId = _id
@@ -186,8 +203,13 @@ func RunExtractTask(taskId string) {
 }
 
 //信息预处理
-func PreInfo(doc map[string]interface{}, isextFile bool) (j, jf *ju.Job) {
+func PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
 	defer qu.Catch()
+	//判断是否有附件这个字段
+	var isextFile bool
+	if doc["isextFile"] != nil{
+		isextFile = doc["isextFile"].(bool)
+	}
 	detail := ""
 	d1, _ := doc["detail"].(string)
 	d2, _ := doc["contenthtml"].(string)
@@ -199,7 +221,10 @@ func PreInfo(doc map[string]interface{}, isextFile bool) (j, jf *ju.Job) {
 	detail = ju.CutLableStr(detail)
 	detail = cut.ClearHtml(detail)
 	doc["detail"] = detail
-	doc["detailfile"] = "" //附件文本堆一起(后期可以考虑,分开处理)
+
+	if isextFile {
+		file2text(&doc)  //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
+	}
 	toptype := qu.ObjToString(doc["toptype"])
 	if qu.ObjToString(doc["type"]) == "bid" {
 		toptype = "结果"
@@ -246,13 +271,53 @@ func PreInfo(doc map[string]interface{}, isextFile bool) (j, jf *ju.Job) {
 	return j, jf
 }
 
+//遍历附件字段内容,拼接在一起;附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
+func file2text(doc *map[string]interface{}) {
+	var strfileinfo bytes.Buffer
+	if v, ok := (*doc)["projectinfo"].(map[string]interface{}); ok {
+		if va, ok := v["attachments"].(map[string]interface{}); ok {
+			for _, vaatt := range va {
+				if fileinfo, ok := vaatt.(map[string]interface{}); ok {
+					if qu.ObjToString(fileinfo["content"]) != "" {
+						switch fileinfo["content"].(type) {
+						case string:
+							lock.Lock()
+							strfileinfo.WriteString(fileinfo["content"].(string) + " \n")
+							lock.Unlock()
+						case []map[string]interface{}:
+							for _, fv := range fileinfo["content"].([]map[string]interface{}) {
+								if fv["context"] != nil {
+									lock.Lock()
+									strfileinfo.WriteString(fv["context"].(string) + " \n")
+									lock.Unlock()
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	if utf8.RuneCountInString(strfileinfo.String()) < qu.IntAllDef(ju.Config["filelength"],100000 ){
+		(*doc)["detailfile"] = strfileinfo.String() //附件文本堆一起(后期可以考虑,分开处理)
+	}
+}
+
 //抽取
 func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
 	qu.Try(func() {
 		doc := *j.Data
+		docfile := make(map[string]interface{})
+		if jf != nil{
+			docfile = *jf.Data
+			docfile["dockey"]= "detailfile"
+		}
 		//全局前置规则,结果覆盖doc属性
 		for _, v := range e.RulePres {
 			doc = ExtRegPre(doc, j, v, e.TaskInfo)
+			if jf != nil{
+				docfile = ExtRegPre(docfile, jf, v, e.TaskInfo)
+			}
 		}
 		//抽取规则
 		for _, vc := range e.RuleCores {
@@ -286,9 +351,47 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
 			}
 			//log.Println("抽取-后置规则", tmp)
 		}
+		//抽取规则-附件
+		if jf != nil{
+			for _, vc := range e.RuleCores {
+				tmp := ju.DeepCopy(docfile).(map[string]interface{})
+				//是否进入逻辑
+				if !ju.Logic(vc.LuaLogic, tmp) {
+					continue
+				}
+				//抽取-前置规则
+				for _, v := range vc.RulePres {
+					tmp = ExtRegPre(tmp, jf, v, e.TaskInfo)
+				}
+				//log.Println("抽取-前置规则", tmp)
+
+				//抽取-规则
+				for _, v := range vc.RuleCores {
+					ExtRegCore(vc.ExtFrom, tmp, jf, v, e)
+				}
+				//log.Println("抽取-规则", tmp)
+
+				//项目名称未能抽取到,标题来凑
+				if vc.Field == "projectname" {
+					if len(jf.Result[vc.Field]) < 1 {
+						jf.Result[vc.Field] = append(jf.Result[vc.Field], &ju.ExtField{vc.Field, "title", "title", "regexp", "title", vc.ExtFrom, jf.Title, 0})
+					}
+				}
+
+				//抽取-后置规则
+				for _, v := range vc.RuleBacks {
+					ExtRegBack(jf, v, e.TaskInfo)
+				}
+				//log.Println("抽取-后置规则", tmp)
+			}
+		}
+
 		//全局后置规则
 		for _, v := range e.RuleBacks {
 			ExtRegBack(j, v, e.TaskInfo)
+			if jf != nil {
+				ExtRegBack(jf, v, e.TaskInfo)
+			}
 		}
 		//候选人加入
 		if len(j.Winnerorder) > 0 {
@@ -314,6 +417,32 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
 			}
 			j.Result["winner"] = winners
 		}
+		//候选人加入-附件
+		if jf != nil{
+			if len(jf.Winnerorder) > 0 {
+				winner := &ju.ExtField{
+					Field:     "winner",
+					Code:      "",
+					RuleText:  "",
+					Type:      "winnerorder",
+					MatchType: "winnerorder",
+					ExtFrom:   "",
+					Value:     jf.Winnerorder[0]["entname"],
+					Score:     0,
+				}
+				if len([]rune(qu.ObjToString(jf.Winnerorder[0]["entname"]))) < 4 {
+					winner.Score = -5
+				}
+				winners := jf.Result["winner"]
+				if winners != nil {
+					winners = append(winners, winner)
+				} else {
+					winners = []*ju.ExtField{}
+					winners = append(winners, winner)
+				}
+				jf.Result["winner"] = winners
+			}
+		}
 		//函数清理
 		for key, val := range j.Result {
 			for _, v := range val {
@@ -333,11 +462,38 @@ func (e *ExtractTask) ExtractProcess(j, jf *ju.Job) {
 				lock.Unlock()
 			}
 		}
+		//函数清理-附件
+		if jf != nil{
+			for key, val := range jf.Result {
+				for _, v := range val {
+					lock.Lock()
+					cfn := e.ClearFn[key]
+					lock.Unlock()
+					data := clear.DoClearFn(cfn, []interface{}{v.Value, jf.Content})
+					v.Value = data[0]
+					//清理特殊符号
+					lock.Lock()
+					if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
+						clear.MesField[key] != nil {
+						text := qu.ObjToString(v.Value)
+						text = clear.OtherClean(key, text)
+						v.Value = text
+					}
+					lock.Unlock()
+				}
+			}
+		}
 		PackageDetail(j, e) //处理分包信息
+		if jf != nil{
+			PackageDetail(jf, e) //处理分包信息-附件
+		}
 		//		bs, _ := json.Marshal(j.Result)
 		//		log.Println("抽取结果", j.Title, j.SourceMid, string(bs))
 		//分析抽取结果并保存 todo
 		AnalysisSaveResult(j, e)
+		if jf != nil{
+			AnalysisSaveResult(jf, e) //分析抽取结果并保存-附件
+		}
 	}, func(err interface{}) {
 		log.Println("ExtractProcess err", err)
 	})
@@ -360,7 +516,12 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 		}
 		AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
 	} else {
-		key := qu.If(in.Field == "", "detail", in.Field).(string)
+		var key string
+		if doc["dockey"]== nil{
+			key = qu.If(in.Field == "", "detail", in.Field).(string)
+		}else {
+			key = qu.If(in.Field == "", "detailfile", in.Field).(string)
+		}
 		text := qu.ObjToString(doc[key])
 		extinfo[key] = in.RegPreBac.Reg.ReplaceAllString(text, "")
 		doc[key] = extinfo[key]                                      //结果覆盖原doc

+ 3 - 0
src/jy/extract/extractInit.go

@@ -66,6 +66,9 @@ type ExtractTask struct {
 	IsExtractCity bool                //是否开启城市抽取
 	Fields        map[string]int      //抽取属性组
 
+	IsFileField       bool      //是否开启附件抽取
+	FileFields        map[string]int      //抽取附件属性组
+
 	ResultChanel chan bool                  //抽取结果详情
 	ResultArr    [][]map[string]interface{} //抽取结果详情
 	BidChanel    chan bool                  //抽取结果

+ 27 - 3
src/jy/extract/extractudp.go

@@ -145,7 +145,15 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 					}
 					_id := qu.BsonIdToSId(v["_id"])
 					log.Println(_id)
-					j, jf := PreInfo(v, false)
+					var j, jf *ju.Job
+					if ext.IsFileField{
+						if v["projectinfo"] != nil {
+							v["isextFile"] = true
+							j, jf = PreInfo(v)
+						}
+					}else {
+						j, _ = PreInfo(v)
+					}
 					ext.TaskInfo.ProcessPool <- true
 					go ext.ExtractProcess(j, jf)
 					sid = _id
@@ -165,7 +173,15 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 					}
 					_id := qu.BsonIdToSId(v["_id"])
 					log.Println(_id)
-					j, jf := PreInfo(v, false)
+					var j, jf *ju.Job
+					if ext.IsFileField{
+						if v["projectinfo"] != nil {
+							v["isextFile"] = true
+							j, jf = PreInfo(v)
+						}
+					}else {
+						j, _ = PreInfo(v)
+					}
 					ext.TaskInfo.ProcessPool <- true
 					go ext.ExtractProcess(j, jf)
 					sidback = _id
@@ -199,7 +215,15 @@ func ExtractByUdp(sid, eid string, instanceId ...string) {
 				}
 				_id := qu.BsonIdToSId(v["_id"])
 				log.Println(_id)
-				j, jf := PreInfo(v, false)
+				var j, jf *ju.Job
+				if ext.IsFileField{
+					if v["projectinfo"] != nil {
+						v["isextFile"] = true
+						j, jf = PreInfo(v)
+					}
+				}else {
+					j, _ = PreInfo(v)
+				}
 				ext.TaskInfo.ProcessPool <- true
 				go ext.ExtractProcess(j, jf)
 				sid = _id