瀏覽代碼

Merge branch 'dev3.4' of http://192.168.3.207:10080/qmx/jy-data-extract into dev3.4

apple 5 年之前
父節點
當前提交
79bd7e07d4

+ 2 - 2
src/config.json

@@ -2,8 +2,8 @@
     "port": "9090",
     "mgodb": "192.168.3.207:27092",
     "dbsize": 3,
-    "dbname": "extract_v3",
-    "redis": "qyk_redis=127.0.0.1:6379",
+    "dbname": "extract_v3xs",
+    "redis": "qyk_redis=192.168.3.207:6379",
     "elasticsearch": "http://127.0.0.1:9800",
     "elasticsearch_index": "winner_enterprise_tmp",
     "elasticsearch_type": "winnerent",

+ 1 - 1
src/jy/clear/clear.go

@@ -53,7 +53,7 @@ func DoClearFn(clear []string, data []interface{}) []interface{} {
 }
 
 //取手机号
-var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|\\d{3,4}\\*{3,4}\\d{3,4}|\\d{3,4}[\u3000\u2003\u00a0\\s]*\\d{4,5}[\u3000\u2003\u00a0\\s]*\\d{3,4}|(\\d{2,}[×―—-\\-])+\\d{2,}[×―—-\\-]+(\\d{3,})+|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
+var PhoneReg = regexp.MustCompile("((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,5})?|\\d{3,5}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{3,})?|\\d{3,4}\\*{3,4}\\d{3,4}|\\d{3,4}[\u3000\u2003\u00a0\\s]*\\d{4,5}[\u3000\u2003\u00a0\\s]*\\d{3,4}|(\\d{2,}[×―—-\\-])+\\d{2,}[×―—-\\-]+(\\d{3,})+|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
 
 func GetPhone(data []interface{}) []interface{} {
 	data[0] = PhoneReg.FindString(fmt.Sprint(data[0]))

+ 0 - 95
src/jy/extract/extpackage.go

@@ -213,101 +213,6 @@ func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
 						sonJobResult["name"] = j.Title
 					}
 				}
-				//分包暂不参与选举
-				/*
-					for k, tags := range e.Tag {
-					L:
-						for _, tag := range tags {
-							if pkg.TableKV != nil {
-								for key, val := range pkg.TableKV.Kv {
-									if tag.Key == key {
-										clearmap[k] = false
-										var tmpval interface{}
-										if len(e.ClearFn[k]) > 0 {
-											data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
-											tmpval = data[0]
-										} else {
-											tmpval = val
-										}
-										sonJobResult[k] = tmpval
-										if packagenum == 1 {
-											field := &ju.ExtField{
-												Field:     k,
-												Code:      "package",
-												RuleText:  "package",
-												Type:      "table",
-												MatchType: "tag_string",
-												ExtFrom:   "package",
-												Value:     tmpval,
-												Score:     0,
-											}
-											j.Result[k] = append(j.Result[k], field)
-										}
-										break L
-									}
-								}
-							}
-							if pkg.ColonKV != nil {
-								for key, val := range pkg.ColonKV.Kv {
-									if tag.Key == key {
-										clearmap[k] = true
-										var tmpval interface{}
-										if len(e.ClearFn[k]) > 0 {
-											data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
-											tmpval = data[0]
-										} else {
-											tmpval = val
-										}
-										sonJobResult[k] = tmpval
-										if packagenum == 1 {
-											field := &ju.ExtField{
-												Field:     k,
-												Code:      "package",
-												RuleText:  "package",
-												Type:      "colon",
-												MatchType: "tag_string",
-												ExtFrom:   "package",
-												Value:     tmpval,
-												Score:     0,
-											}
-											j.Result[k] = append(j.Result[k], field)
-										}
-										break L
-									}
-								}
-							}
-							if pkg.SpaceKV != nil {
-								for key, val := range pkg.SpaceKV.Kv {
-									if tag.Key == key {
-										clearmap[k] = true
-										var tmpval interface{}
-										if len(e.ClearFn[k]) > 0 {
-											data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
-											tmpval = data[0]
-										} else {
-											tmpval = val
-										}
-										sonJobResult[k] = tmpval
-										if packagenum == 1 {
-											field := &ju.ExtField{
-												Field:     k,
-												Code:      "package",
-												RuleText:  "package",
-												Type:      "space",
-												MatchType: "tag_string",
-												ExtFrom:   "package",
-												Value:     tmpval,
-												Score:     0,
-											}
-											j.Result[k] = append(j.Result[k], field)
-										}
-										break L
-									}
-								}
-							}
-						}
-					}
-				*/
 				//如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值
 				if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 {
 					firstWinnerOrder := pkg.WinnerOrder[0]

+ 24 - 20
src/jy/extract/extract.go

@@ -27,12 +27,12 @@ import (
 var (
 	lock, lockrule, lockclear, locktag, blocktag sync.RWMutex
 
-	cut     = ju.NewCut()                          //获取正文并清理
-	ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
-	TaskList      map[string]*ExtractTask //任务列表
-	ClearTaskList map[string]*ClearTask   //清理任务列表
-	saveLimit     = 100                   //抽取日志批量保存
-	PageSize      = 5000                  //查询分页
+	cut           = ju.NewCut()                          //获取正文并清理
+	ExtLogs       map[*TaskInfo][]map[string]interface{} //抽取日志
+	TaskList      map[string]*ExtractTask                //任务列表
+	ClearTaskList map[string]*ClearTask                  //清理任务列表
+	saveLimit     = 100                                  //抽取日志批量保存
+	PageSize      = 5000                                 //查询分页
 	Fields        = `{"title":1,"summary":1,"detail":1,"contenthtml":1,"site":1,"spidercode":1,"toptype":1,"subtype":1,"bidstatus":1,"area":1,"city":1,"comeintime":1,"publishtime":1,"sensitive":1,"projectinfo":1,"jsondata":1,"href":1,"infoformat":1,"attach_text":1,"dataging":1}`
 	Fields2       = `{"budget":1,"bidamount":1,"title":1,"projectname":1,"winner":1}`
 )
@@ -331,7 +331,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 		Result:    map[string][]*ju.ExtField{},
 		BuyerAddr: qu.ObjToString(doc["buyeraddr"]),
 		RuleBlock: e.RuleBlock,
-		Dataging: qu.IntAll(doc["dataging"]),
+		Dataging:  qu.IntAll(doc["dataging"]),
 	}
 	if (j.Jsondata != nil || (*j.Jsondata) != nil) && (*j.Jsondata)["jsoncontent"] != nil {
 		delete((*j.Jsondata), "jsoncontent")
@@ -353,7 +353,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 			BuyerAddr:      qu.ObjToString(doc["buyeraddr"]),
 			RuleBlock:      e.RuleBlock,
 			IsFile:         isextFile,
-			Dataging: qu.IntAll(doc["dataging"]),
+			Dataging:       qu.IntAll(doc["dataging"]),
 		}
 		if (jf.Jsondata != nil || (*jf.Jsondata) != nil) && (*jf.Jsondata)["jsoncontent"] != nil {
 			delete((*jf.Jsondata), "jsoncontent")
@@ -402,6 +402,11 @@ func file2text(doc *map[string]interface{}) {
 				for _, fileinfo := range fileinfos {
 					if ff, ok := fileinfo.(map[string]interface{}); ok {
 						attach_url := qu.ObjToString(ff["attach_url"])
+						//if utf8.RuneCountInString(tmpstr+attach_url) < qu.IntAllDef(ju.Config["filelength"], 100000) {
+						//	tmpstr += attach_url + "\n"
+						//} else {
+						//	break
+						//}
 						bs := ju.OssGetObject(attach_url)
 						if utf8.RuneCountInString(tmpstr+bs) < qu.IntAllDef(ju.Config["filelength"], 100000) {
 							tmpstr += bs + "\n"
@@ -579,11 +584,9 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
 		//函数清理
 		for key, val := range j.Result {
 			for i, v := range val {
-				// if v.ExtFrom == "title"&& v.Field == "buyer"{
-				// 	qu.Debug("title---",v.Value)
-				// }else if v.Field == "buyer"{
-				// 	qu.Debug("text---",v.Value)
-				// }
+				if v.Field == "projectname" && v.Type == "table" {
+					break
+				}
 				lockclear.Lock()
 				var cfn = []string{}
 				if isSite {
@@ -1441,9 +1444,9 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo, vc *RuleCore) {
 						continue
 					}
 					//table抽取到的数据不清理
-					//					if v.Type == "table" && v.Field != "projectname" {
-					//						continue
-					//					}
+					if v.Type == "table" && v.Field == "projectname" {
+						return
+					}
 					text := qu.ObjToString(v.Value)
 					if text != "" {
 						text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
@@ -1471,8 +1474,9 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo, vc *RuleCore) {
 			for key, tmp := range j.Result {
 				exts := []interface{}{}
 				for k, v := range tmp {
-					if v.Type == "table" { //table抽取到的数据不清理
-						continue
+					//table抽取到的数据不清理
+					if v.Type == "table" && v.Field == "projectname" {
+						return
 					}
 					text := qu.ObjToString(v.Value)
 					if text != "" {
@@ -1683,6 +1687,7 @@ var clearWinnerReg = regexp.MustCompile("名称|施工|拟定供应商名称|:
 //分析抽取结果并保存
 func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 	qu.Try(func() {
+		
 		//重新取出清理过后的中标候选人
 		resetWinnerorder(j)
 		doc, result, _id := funcAnalysis(j, e)
@@ -1782,7 +1787,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				savewinner = RemoveReplicaSliceString(savewinner)
 				tmp["s_winner"] = strings.Join(savewinner, ",")
 			}
-
 		} else if tmp["winner"] != nil && tmp["winner"] != "" {
 			//没有分包取winner
 			tmp["s_winner"] = tmp["winner"]
@@ -2084,7 +2088,7 @@ func rangeBlockToJson(j *ju.Block, tmpblock ju.TmpBlock) (b *ju.TmpBlock) {
 
 //去重冗余字段
 func delFiled(k string) bool {
-	return k=="detailfile"||k == "summary" || k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" || k == "projectinfo" || k == "jsondata"
+	return k == "detailfile" || k == "summary" || k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" || k == "projectinfo" || k == "jsondata"
 }
 
 func funcAnalysis(j *ju.Job, e *ExtractTask) (*map[string]interface{}, map[string][]*ju.ExtField, string) {

+ 16 - 0
src/jy/extract/score_jsondata.go

@@ -94,6 +94,22 @@ func JsonDataMergeProcessing(j *util.Job, e *ExtractTask) map[string][]*util.Ext
 				//AddExtLog("clear", j.SourceMid, (*j.Jsondata)[v], newNum[0], &RegLuaInfo{ "JsonData_"+v, "", v, "", false, nil, nil}, e.TaskInfo) //抽取日志
 				//}
 				continue
+			}else if v == "bidopentime"{
+				lockclear.Lock()
+				cfn := e.ClearFn[v]
+				lockclear.Unlock()
+				if len(cfn) == 0 {
+					continue
+				}
+				extFields := make([]*util.ExtField, 0)
+				if bt,ok :=(*j.Jsondata)[v].(float64);ok && bt>0{
+					extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: bt, Score: 0.1})
+				}else {
+					newNum := clear.DoClearFn(cfn, []interface{}{(*j.Jsondata)[v], ""})
+					extFields = append(extFields, &util.ExtField{Code: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), Field: v, ExtFrom: "JsonData_" + v + "_" + fmt.Sprint(jdextweight), SourceValue: (*j.Jsondata)[v], Value: newNum[0], Score: 0.1})
+				}
+				j.Result[v] = extFields
+				continue
 			}
 			vv := strings.TrimSpace(util2.ObjToString((*j.Jsondata)[v]))
 			if vv == "" || strings.Contains(vv, "详见公告") {

+ 2 - 2
src/jy/pretreated/analykv.go

@@ -10,7 +10,7 @@ import (
 var Han = regexp.MustCompile("[\\p{Han}]")
 var Han1 = regexp.MustCompile("[^:;;,:,。. \u3000\u2003\u00a0\\s]")
 var Han2 = regexp.MustCompile("[^:;;,:,。.]")
-var Key = regexp.MustCompile("[::]")
+var Key = regexp.MustCompile("[::]")
 var Time = regexp.MustCompile("[\\d]")
 var dh = regexp.MustCompile("[,,.]")
 var space = regexp.MustCompile("[\\s\\n \u3000\u2003\u00a0]+")
@@ -401,7 +401,7 @@ func keydetail(k, v string, m *SortMap, tag string, pos int, strs [][]string, ma
 				}
 			}
 		} else if len([]rune(k)) == 2 {
-			if filter_zbje_jd.MatchString(k) { //钱
+			if !filter_zbje_jd.MatchString(k) { //钱
 				if tag != "" && filter_tag_zb.MatchString(tag) {
 					k = "中标" + k
 				} else {

+ 35 - 0
src/jy/pretreated/division.go

@@ -253,6 +253,7 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite
 		title = filterTitle(title)
 		//分割标题 [和及]。。。 参与
 		splitTitles := ProcTitle(title)
+		blockText = mergetext(splitTitles, blockText)
 		block := &util.Block{
 			Index:  index,     //序号
 			Text:   blockText, //内容
@@ -319,6 +320,32 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock, isSite
 	return returnBlocks, returnValue
 }
 
+func mergetext(titles []string, text string) string {
+	if len(titles) == 0 || utf8.RuneCountInString(text) > 150 {
+		return text
+	}
+	splitLenstrs := strings.Split(text, "\n")
+	if len(splitLenstrs) == 1 || len(titles) != len(splitLenstrs)-1 {
+		return text
+	}
+	tt := ""
+	for i, v := range splitLenstrs[1:] {
+		lentexts := regDivision.Split(v, -1)
+		if len(lentexts) == 2 {
+			if strings.Contains(titles[i], lentexts[0]) {
+				tt += titles[i] + ":" + lentexts[1] + "\n"
+			}else if strings.Contains(titles[i], lentexts[0]) ||strings.Contains(titles[i], lentexts[0]){
+				tt += titles[i] + ":" + lentexts[1] + "\n"
+			}
+		}
+	}
+	if len(tt) == 0 {
+		return text
+	} else {
+		return tt
+	}
+}
+
 //块标题处理
 func ProcTitle(title string) []string {
 	if title == "" {
@@ -345,6 +372,14 @@ func ProcTitle(title string) []string {
 				}
 				ara[kk] = start + vv
 			}
+		} else if vv == "联系人" || vv == "联系方式" {
+			if strings.Contains(prev, "代理") {
+				ara[kk] = "代理机构" + vv
+			} else if strings.Contains(prev, "中标") {
+				ara[kk] = "中标单位" + vv
+			} else if strings.Contains(prev, "采购") {
+				ara[kk] = "采购单位" + vv
+			}
 		}
 		if len([]rune(vv)) > 3 {
 			if direct == -1 {

+ 20 - 9
src/res/fieldscore.json

@@ -100,8 +100,13 @@
             },
             {
                 "describe": "乱码",
-                "regstr": "[±??¨êí¤ì×üàóμˉ÷°úéè]",
-                "score": -20
+                "regstr": "[±??¨êí¤ìüàóμˉ÷°úéè]",
+                "score": -10
+            },
+            {
+                "describe": "符合",
+                "regstr": "[,,.。!!]",
+                "score": -10
             }
         ],
         "length": [
@@ -160,7 +165,7 @@
         "positivewords": [
             {
                 "describe": "以*结尾",
-                "regstr": ".{2,100}(总站|委员会|管委会|联合会|联合体|医院|卫计委|机关|社区|中心|中心校|分校|办公室|学校|幼儿园|动物园|管理站|馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|[初高]中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司|监狱|监测站|血站|检查站|工作站|供应站)$",
+                "regstr": ".{2,100}(总站|委员会|管委会|联合会|联合体|医院|卫计委|机关|社区|中心|中心校|分校|办公室|学校|幼儿园|动物园|管理站|馆|基地|青年宫|少年宫|艺术宫|电视台|协会|政府|[初高]中|集团|银行|[大中小]学|院|厂|店|段|场|社|室|部|厅|局|处|所|队|公司|监狱|监测站|血站|检查站|工作站|供应站|分行)$",
                 "score": 10
             }
         ],
@@ -172,8 +177,8 @@
             },
             {
                 "describe": "包含负分",
-                "regstr": "(代表|招标|交易中心|顾问|单位|测试)",
-                "score": -5
+                "regstr": "(代表|招标|交易中心|顾问|单位|测试|采购)",
+                "score": -10
             },
             {
                 "describe": "包含特殊符号2",
@@ -484,7 +489,13 @@
     },
  	"buyertel": {
         "type": "string",
-        "positivewords": [],
+        "positivewords": [
+            {
+                "describe": "区号开头",
+                "regstr": "^\\([0-9]{3,4}\\)",
+                "score": 2
+            }
+        ],
         "negativewords": [
             {
                 "describe": "出现中文汉字",
@@ -589,8 +600,8 @@
                 "score": -10
             },  {
                 "describe": "包含负分",
-                "regstr": "(详(见|情)|公告|test)",
-                "score": -10
+                "regstr": "(详(见|情)|公告|test|招标人)",
+                "score": -20
             }
         ],
         "length": [
@@ -650,7 +661,7 @@
             {
                 "describe": "[gt,∞,score]",
                 "range": [
-                    14,
+                    24,
                     -1,
                     -10
                 ]

+ 5 - 10
src/res/formattext.json

@@ -93,7 +93,7 @@
 		},
 		{
 			"reg": "\\n(.{2,8})联系方式[::](.+?)\\s+\\+\\s+(.+)",
-            "separator": "\n${1}联系人:$2\n${1}联系电话:$3",
+            "separator": "\n${1}联系人:$2\n${1}联系方式:$3",
             "desc": ""
 		},
 		{
@@ -109,21 +109,16 @@
 		{
 			"reg": "(收货)(联系人)和(联系方式)[::](.+?)/(.+)",
             "separator": "${1}${2}:${4}\n${1}${3}:${5}",
-            "desc": ""
-		},
-		{
-			"reg": "(招标人[::][^::,、,]+?)(联系人[::].+)",
-            "separator": "${1}\n${2}",
             "desc": ""
 		},
 		{
 			"reg": "(?s)([^((,,。、.;;::\\s\u3000\u2003\u00a0]{2,8})联系人[::]([\u4e00-\u9fa5、]+)\\s+((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,4}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{4})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)",
-            "separator": "${1}联系人:${2}\n${1}联系电话:${3}",
+            "separator": "${1}联系人:${2}\n${1}联系方式:${3}",
             "desc": ""
 		},
 		{
 			"reg": "(采购[^方式]{1,8})[::]([^::]{3,15})[,,]([^::]{2,5})((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,4}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{4})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)",
-            "separator": "${1}:${2}\n${1}联系人:${3}\n${1}联系电话:${4}",
+            "separator": "${1}:${2}\n${1}联系人:${3}\n${1}联系方式:${4}",
             "desc": ""
 		},
 		{
@@ -143,8 +138,8 @@
 		},
 		{
 			"reg": "(?s)([^((,,。、.;;::\\s\u3000\u2003\u00a0]{0,8}?)(联系(方式|电话|人)和?)+[::]([^\\d::]{2,8}?)[((]?[\\s\u3000\u2003\u00a0]*((([((]\\d{3,4}[))])?(\\d{6,12}([×―—-\\-]+\\d{3,4})?|\\d{3,4}[×―—-\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—-\\-]+\\d{4})?|(\\d{2}[×―—-\\-])+\\d{8}[×―—-\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,,;;\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)",
-            "separator": "${1}联系人:${4}\n${1}联系电话:${5}",
-            "desc": "采购人联系方式:李静  0311-66629799 or 联系电话:张先生 0917―2660282"
+            "separator": "${1}联系人:${4}\n${1}联系方式:${5}",
+            "desc": "采购人联系方式:李静  0311-66629799 or 联系方式:张先生 0917―2660282"
 		},
 		{
 			"reg": "[((]([^))]{2,8}联系人)[::](.+?)[,,]((联系)?(电话|手机)(号码)?)[::](.+)[))]",

+ 0 - 1
src/res/specialsymbols.json

@@ -108,7 +108,6 @@
             "buyer": true,
             "winner": true,
             "agency": true,
-            "agency": true,
             "buyertel": true,
             "buyerperson": true
         },

+ 12 - 0
udpcreateindex/src/biddingall.go

@@ -30,6 +30,15 @@ func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
 				"$lte": qutil.StringTOBsonId(mapInfo["lteid"].(string)),
 			},
 		}
+	} else {
+		idMap := q["_id"].(map[string]interface{})
+		tmpQ := map[string]interface{}{}
+		for c, id := range idMap {
+			if idStr, ok := id.(string); ok && id != "" {
+				tmpQ[c] = qutil.StringTOBsonId(idStr)
+			}
+		}
+		q["_id"] = tmpQ
 	}
 	//bidding库
 	session := mgo.GetMgoConn()
@@ -284,6 +293,9 @@ func biddingAllTask(data []byte, mapInfo map[string]interface{}) {
 						} else {
 							if v == "detail" {
 								detail, _ := tmp[v].(string)
+								if len([]rune(detail)) > detailLength {
+									detail = detail[:detailLength]
+								}
 								newTmp[v] = FilterDetail(detail)
 							} else {
 								newTmp[v] = tmp[v]

+ 3 - 0
udpcreateindex/src/biddingindex.go

@@ -349,6 +349,9 @@ func doIndex(infos []map[string]interface{}, eMap map[string]map[string]interfac
 					} else {
 						if v == "detail" {
 							detail, _ := tmp[v].(string)
+							if len([]rune(detail)) > detailLength {
+								detail = detail[:detailLength]
+							}
 							newTmp[v] = FilterDetail(detail)
 						} else {
 							newTmp[v] = tmp[v]

+ 1 - 1
udpcreateindex/src/bidingpurchasing.go

@@ -350,7 +350,7 @@ func getFileText(tmp map[string]interface{}) (filetext string) {
 					if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok {
 						if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" {
 							bs := u.OssGetObject(attach_url) //oss读数据
-							if utf8.RuneCountInString(filetext+bs) < util.IntAllDef(Sysconfig["filelength"], 100000) {
+							if utf8.RuneCountInString(filetext+bs) < fileLength {
 								filetext += bs + "\n"
 							} else {
 								break

+ 2 - 1
udpcreateindex/src/config.json

@@ -43,7 +43,8 @@
         "purchasinglist":"itemname,model,unitname,number",
         "multiIndex": ""
     },
-    "filelength": 100000,
+    "filelength": 50000,
+    "detaillength": 50000,
     "project": {
         "db": "mxs",
         "collect": "test",

+ 4 - 0
udpcreateindex/src/main.go

@@ -28,6 +28,8 @@ var (
 	multiIndex           []string
 	purchasinglistFields []string
 	BulkSize             = 400
+	detailLength         = 50000
+	fileLength           = 50000
 	//bidding_other连接信息
 	bidding_other_es *elastic.Elastic
 	other_index      string
@@ -40,6 +42,8 @@ func init() {
 	util.ReadConfig(&Sysconfig)
 	inits()
 	go checkMapJob()
+	detailLength = util.IntAllDef(Sysconfig["detaillength"], 50000)
+	fileLength = util.IntAllDef(Sysconfig["filelength"], 50000)
 	updport, _ = Sysconfig["updport"].(string)
 	winner, _ = Sysconfig["winner"].(map[string]interface{})
 	standard, _ = Sysconfig["standard"].(map[string]interface{})