zhangjinkun 5 년 전
부모
커밋
fd1eccd857
5개의 변경된 파일52개의 추가작업 그리고 37개의 파일을 삭제
  1. 3 2
      src/config.json
  2. 5 5
      src/jy/extract/extract.go
  3. 29 25
      src/jy/extract/extractInit.go
  4. 9 1
      src/jy/util/util.go
  5. 6 4
      src/main.go

+ 3 - 2
src/config.json

@@ -2,7 +2,7 @@
     "port": "9090",
     "mgodb": "192.168.3.207:27092",
     "dbsize": 10,
-    "dbname": "extract_kf",
+    "dbname": "extract_dev32",
     "redis": "buyer=192.168.3.207:1679,winner=192.168.3.207:1679,agency=192.168.3.207:1679",
     "elasticsearch": "http://127.0.0.1:9200",
     "elasticsearch_index": "extract_kf",
@@ -10,7 +10,8 @@
     "elasticPoolSize": 30,
     "mergetable": "projectset",
     "mergetablealias": "projectset_v1",
-    "saveresult": true,
+    "saveresult": false,
+    "fieldsfind":false,
     "qualityaudit": false,
     "saveblock": false,
     "filelength": 100000,

+ 5 - 5
src/jy/extract/extract.go

@@ -262,7 +262,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
 	if isextFile {
 		file2text(&doc) //附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
 	}
-	//正文小于50个字,有附件把附件内容加到正文
+	//正文小于200个字,有附件把附件内容加到正文
 	tmpDeatil := detail
 	tmpdocument, err := goquery.NewDocumentFromReader(strings.NewReader(tmpDeatil))
 	if err == nil {
@@ -1554,7 +1554,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		//重新取出清理过后的中标候选人
 		resetWinnerorder(j)
 		doc, result, _id := funcAnalysis(j, e)
-		if isSaveTag, _ := ju.Config["isSaveTag"].(bool); isSaveTag {
+		if ju.IsSaveTag {
 			go otherNeedSave(j, result, e)
 		}
 		auxinfo := auxInfo(j)
@@ -1614,7 +1614,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 			}
 		}
 		//质量审核
-		if ok, _ := ju.Config["qualityaudit"].(bool); ok {
+		if ju.QualityAudit {
 			e.QualityAudit(tmp)
 		}
 		if e.IsExtractCity { //城市抽取
@@ -1646,7 +1646,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 		blocks := make([]ju.BlockAndTag, 0)
 		for _, v := range j.Block {
 			//分包和标签
-			if ju.Config["saveblock"].(bool) {
+			if ju.SaveBlock {
 				xx, _ := json.Marshal(v)
 				tmpblock := new(ju.TmpBlock)
 				err := json.Unmarshal(xx, &tmpblock)
@@ -1712,7 +1712,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
 				e.BidTotal++
 				e.RWMutex.Unlock()
 			}
-			if b, ok := ju.Config["saveresult"].(bool); ok && b {
+			if ju.SaveResult {
 				id := tmp["_id"]
 				tmp["result"] = result
 				tmp["resultf"] = resultf

+ 29 - 25
src/jy/extract/extractInit.go

@@ -80,7 +80,7 @@ type ExtractTask struct {
 	SiteClearFn   map[string][]string               //站点清理函数
 	IsExtractCity bool                              //是否开启城市抽取
 	Fields        map[string]int                    //抽取属性组
-	SiteFields        map[string]int                    //抽取站点属性组
+	SiteFields    map[string]int                    //抽取站点属性组
 
 	IsFileField bool      //是否开启附件抽取
 	FileFields  *sync.Map //抽取附件属性组
@@ -516,9 +516,9 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 					continue
 				}
 				field := qu.ObjToString(v["s_field"])
-				if isSite{
+				if isSite {
 					e.SiteFields[field] = 1
-				}else {
+				} else {
 					e.Fields[field] = 1 //加入抽取属性组备用
 				}
 				rinfo := &RegLuaInfo{
@@ -573,9 +573,9 @@ func (e *ExtractTask) InitRuleCore(isSite bool) {
 					continue
 				}
 				field := qu.ObjToString(v["s_field"])
-				if isSite{
+				if isSite {
 					e.SiteFields[field] = 1
-				}else {
+				} else {
 					e.Fields[field] = 1 //加入抽取属性组备用
 				}
 				rinfo := &RegLuaInfo{
@@ -1520,33 +1520,37 @@ func getFieldAllAndBlocks(a [][]map[string]interface{}) (arr [][]map[string]inte
 	for _, v := range a {
 		_id, _ := v[0]["_id"]
 		if tmp, ok := v[1]["$set"].(map[string]interface{}); ok {
-			if tmp["blocks"] != nil {
-				block := map[string]interface{}{
-					"_id":    _id,
-					"blocks": tmp["blocks"],
+			if ju.SaveBlock {
+				if tmp["blocks"] != nil {
+					block := map[string]interface{}{
+						"_id":    _id,
+						"blocks": tmp["blocks"],
+					}
+					blocks = append(blocks, block)
 				}
-				blocks = append(blocks, block)
 			}
 			delete(tmp, "blocks")
-			if f, ok := tmp["fieldall"].(map[string][]map[string]interface{}); ok {
-				fieldall := map[string]interface{}{
-					"_id": _id,
+			if ju.FieldsFind {
+				if f, ok := tmp["fieldall"].(map[string][]map[string]interface{}); ok {
+					fieldall := map[string]interface{}{
+						"_id": _id,
+					}
+					for k, v := range f {
+						fieldall[k] = v
+					}
+					fieldalls = append(fieldalls, fieldall)
 				}
-				for k, v := range f {
-					fieldall[k] = v
+				if ff, ok := tmp["fieldallf"].(map[string][]map[string]interface{}); ok {
+					fieldallf := map[string]interface{}{
+						"_id": _id,
+					}
+					for k, v := range ff {
+						fieldallf[k] = v
+					}
+					fieldallsf = append(fieldalls, fieldallf)
 				}
-				fieldalls = append(fieldalls, fieldall)
 			}
 			delete(tmp, "fieldall")
-			if ff, ok := tmp["fieldallf"].(map[string][]map[string]interface{}); ok {
-				fieldallf := map[string]interface{}{
-					"_id": _id,
-				}
-				for k, v := range ff {
-					fieldallf[k] = v
-				}
-				fieldallsf = append(fieldalls, fieldallf)
-			}
 			delete(tmp, "fieldallf")
 			v[1] = tmp
 		}

+ 9 - 1
src/jy/util/util.go

@@ -32,15 +32,23 @@ var GoodsGet *DFA     //商品
 var BrandGet *DFA     //品牌
 var IsBrandGoods bool //是否开启品牌抽取
 
+var SaveResult, FieldsFind, IsSaveTag, SaveBlock, QualityAudit bool
+
 func init() {
 	syncint = make(chan bool, 1)
 }
 
-func InitMgoPool() {
+func UtilInit() {
 	initCap := qu.IntAll(Config["dbsize"])
 	addr := qu.ObjToString(Config["mgodb"])
 	dbname := qu.ObjToString(Config["dbname"])
 	Mgo = MgoFactory(initCap, initCap*3, 120, addr, dbname)
+
+	SaveResult, _ = Config["saveresult"].(bool)
+	FieldsFind, _ = Config["fieldsfind"].(bool)
+	IsSaveTag, _ = Config["iscltlog"].(bool)
+	SaveBlock, _ = Config["saveblock"].(bool)
+	QualityAudit, _ = Config["qualityaudit"].(bool)
 }
 
 func GetSyncIndex(code string) string {

+ 6 - 4
src/main.go

@@ -1,7 +1,6 @@
 package main
 
 import (
-	log "github.com/donnie4w/go-logger/logger"
 	_ "jy/admin"
 	_ "jy/admin/audit"
 	_ "jy/admin/distribution"
@@ -13,9 +12,12 @@ import (
 	"net/http"
 	_ "net/http/pprof"
 	qu "qfw/util"
+
+	log "github.com/donnie4w/go-logger/logger"
 	//"qfw/util/elastic"
-	"gopkg.in/olivere/elastic.v5"
 	"qfw/util/redis"
+
+	"gopkg.in/olivere/elastic.v5"
 )
 
 func init() {
@@ -29,8 +31,8 @@ func init() {
 	//初始化品牌和商品
 	util.InitBrand()
 	util.InitGoods()
-	//初始化mongo连接
-	util.InitMgoPool()
+	//初始化util
+	util.UtilInit()
 	//初始化redis
 	redis.InitRedisBySize(qu.ObjToString(util.Config["redis"]), 50, 30, 240)
 	//初始化elastic连接