Răsfoiți Sursa

品牌抽取

unknown 6 ani în urmă
părinte
comite
4419511532

+ 13 - 9
src/jy/extract/extract.go

@@ -78,7 +78,6 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
 			}
 			//log.Println(v["_id"])
 			j := PreInfo(v)
-			//fmt.Println(j.HasTable, j.HasGoods, j.HasBrand, j.HasKey, "j-------", j.BrandData)
 			ext.TaskInfo.ProcessPool <- true
 			go ext.ExtractProcess(j)
 		}
@@ -300,13 +299,7 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 				if clear.AsyField[key] != nil || clear.SymField[key] != nil ||
 					clear.MesField[key] != nil {
 					text := qu.ObjToString(v.Value)
-					if key == "projectname" {
-						fmt.Println("1===========", text)
-					}
 					text = clear.OtherClean(key, text)
-					if key == "projectname" {
-						fmt.Println("2===========", text)
-					}
 					v.Value = text
 				}
 			}
@@ -777,7 +770,6 @@ type FieldValue struct {
 
 //分析抽取结果并保存
 func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
-	log.Println("============", j.HasBrand, j.HasGoods, j.HasKey, j.HasTable, j.BrandData)
 	doc := j.Data
 	result := j.Result
 	_id := qu.BsonIdToSId((*doc)["_id"])
@@ -863,7 +855,19 @@ func AnalysisSaveResult(j *ju.Job, e *ExtractTask) {
 			tmp["area"] = p
 		}
 	}
-
+	//品牌抽取
+	if ju.IsBrandGoods {
+		tmp["checkhas"] = map[string]int{
+			"hastable": j.HasTable,
+			"hasgoods": j.HasGoods,
+			"hasbrand": j.HasBrand,
+			"haskey":   j.HasKey,
+		}
+		if len(j.BrandData) > 0 {
+			tmp["brand"] = j.BrandData
+		}
+		//log.Println("============", j.HasBrand, j.HasGoods, j.HasKey, j.HasTable, j.BrandData)
+	}
 	if e.TaskInfo.TestColl == "" {
 		if len(tmp) > 0 { //保存抽取结果
 			tmparr := []map[string]interface{}{

+ 2 - 3
src/jy/pretreated/analytable.go

@@ -2664,7 +2664,6 @@ func (table *Table) analyBrand() {
 	brandRule := u.BrandRules
 	//将val为数组和string的分开
 	for key, val := range table.SortKV.Map {
-		//fmt.Println("key:", key, "	val:", val)
 		key = regReplAllSpace.ReplaceAllString(key, "")
 		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
 		kind := reflect.TypeOf(val).String()
@@ -2779,7 +2778,7 @@ func (table *Table) analyBrand() {
 				}
 				finishKa[vf2K] = vf2
 			}
-			hasKey(table, arrcount) //是否匹配到两个以上的key
+			hasKey(table, arrcount) //是否匹配到table中的标题
 			if arrcount >= 1 {
 				finishData := dealArrData(maxNum, finishKa)
 				table.BrandData = append(table.BrandData, finishData)
@@ -2807,7 +2806,7 @@ func (table *Table) analyBrand() {
 				}
 			}
 			//原始字符串数据处理
-			hasKey(table, strcount) //是否匹配到两个以上的key
+			hasKey(table, strcount) //是否匹配到table中的标题
 			if strcount >= 1 {
 				finishData := dealStrData(endStrMap) //处理数据
 				if len(finishData) > 0 {

+ 0 - 2
src/jy/pretreated/tablev2.go

@@ -31,7 +31,6 @@ type TableResult struct {
 	WinnerOrder    []map[string]interface{}
 	BrandData      [][]map[string]string
 	HasKey         int //有key
-	HasVal         int //有val
 	HasBrand       int //有品牌
 	HasGoods       int //有商品
 }
@@ -496,7 +495,6 @@ type Table struct {
 	BHeader                bool //拆分表是否有表头
 	BrandData              [][]map[string]string
 	HasKey                 int //有key
-	HasVal                 int //有val
 	HasBrand               int //有品牌
 	HasGoods               int //有商品
 }

+ 1 - 2
src/jy/util/article.go

@@ -21,8 +21,7 @@ type Job struct {
 
 	BrandData [][]map[string]string //
 	HasTable  int                   //有table
-	HasKey    int                   //有key
-	HasVal    int                   //有val
+	HasKey    int                   //是否匹配到table中的标题
 	HasBrand  int                   //有品牌
 	HasGoods  int                   //有商品
 }

+ 1 - 0
src/main.go

@@ -20,6 +20,7 @@ func init() {
 	qu.ReadConfig("./res/brandrule.json", &util.BrandRules)
 	qu.ReadConfig("./res/goods.json", &util.GoodsConfig)
 	qu.ReadConfig("./res/brand.json", &util.BrandConfig)
+	//初始化品牌和商品
 	util.InitBrand()
 	util.InitGoods()
 	//初始化mongo连接

+ 1 - 1
src/main_test.go

@@ -14,7 +14,7 @@ import (
 func Test_task(t *testing.T) {
 	Mgo = MgoFactory(1, 3, 120, "192.168.3.207:27082", "extract_kf")
 	//extract.StartExtractTaskId("5b8f804025e29a290415aee1")
-	extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5caafa83a5cb26b9b7ec03b7", "1", "mxs_v3", "mxs_v3")
+	extract.StartExtractTestTask("5b8f804025e29a290415aee1", "5caafa83a5cb26b9b7ec03b7", "5", "mxs_v5", "mxs_v3")
 	//extract.StartExtractTestTask("5c3d75c96984142998eb00e1", "5c2a3d28a5cb26b9b76144dd", "100", "mxs_v3", "mxs_v3")
 	time.Sleep(5 * time.Second)
 }

+ 1 - 1
src/res/brandrule.json

@@ -1,6 +1,6 @@
 {
 	"must":{
-		"itemname":"((^(货物|品目|产品|商品|物资|印刷品|物料|材料|(分项)项目|设备|成交标(的)?)(名称|种类|类别|名|描述|内容|服务)+|服务产品|采购(目录|设备)|^(品名|品目|项目)$)和?)+",
+		"itemname":"((^(货物|品目|产品|商品|物资|印刷品|物料|材料|(分项)项目|设备|成交标(的)?)(名称|种类|名|描述|内容|服务)+|服务产品|采购(目录|设备)|^(品名|品目|项目)$)和?)+",
 		"brandname":"品牌(名称)?|^厂家",
 		"modal":"^(规格)?(型号|参数)|规格$|^(服务内容|采购需求)", 
 		"unitprice":"单价|^价格|(预算|采购预算)(金额)?$|(单个商品|包件)最高限价|(中标成交|单次服务|控制)+金额|^金额$"