Browse Source

Merge branch 'master' of http://192.168.3.207:10080/qmx/jy-data-extract

zhangjinkun 6 years ago
parent
commit
64d39ad53e
1 changed files with 49 additions and 21 deletions
  1. 49 21
      src/jy/extract/extract.go

+ 49 - 21
src/jy/extract/extract.go

@@ -100,11 +100,10 @@ func StartExtractTaskId(taskId string) bool {
 	ext.InitTag()
 	ext.InitClearFn()
 
-	ext.InitProvince()
-	ext.InitCityAll()
-	ext.InitCitySim()
+	//	ext.InitProvince()
+	//	ext.InitCityAll()
+	//	ext.InitCitySim()
 
-	return true
 	ext.IsRun = true
 	if isgo {
 		go RunExtractTask(taskId)
@@ -255,7 +254,7 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 
 		//fmt.Println("-----------", j.Province, j.City, j.BuyerAddr, j.Title) //j.Address
 		//ExtractPC(j.Result, j.Province, j.City, j.Title, j.BuyerAddr, j.SourceMid) //j.Address
-
+		ExtractPC2(j.Result, "Province", "City", "Title", "Addr", j.SourceMid)
 		//分析抽取结果并保存 todo
 		AnalysisSaveResult(j.Data, j.Result, e.TaskInfo)
 
@@ -794,22 +793,51 @@ func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.Ext
 
 //抽取城市、省份
 func ExtractPC2(result map[string][]*ju.ExtField, province, city, title, addr, sourcemid string) (bres bool, c, p string) {
-	pjnarr := make([]string, 0)   //获取所有项目名称结果集
-	buyerarr := make([]string, 0) //获取所有采购单位结果集
-	for key, val := range result {
-		if key == "projectname" {
-			for _, v := range val {
-				p := fmt.Sprint(v.Value)
-				pjnarr = append(pjnarr, p)
+	var pjnarr, buyerarr []string
+	var pb []interface{}
+	for n, val := range result["projectname"] {
+		pjnarr[n] = fmt.Sprint(val.Value)
+	}
+	for n, val := range result["buyer"] {
+		buyerarr[n] = fmt.Sprint(val.Value)
+	}
+	pl := len(pjnarr)
+	bl := len(buyerarr)
+	max := 0
+	if pl > bl {
+		max = pl
+	} else {
+		max = bl
+	}
+	//city, buyer, addr, projectname, title
+	if max == 0 { //没有projectname和buyer结果集
+		tmp1 := []string{city, "", addr, "", title}
+		pb = append(pb, tmp1)
+	} else { //至少有一个结果集
+		if max == pl {
+			for i := 0; i < max; i++ {
+				p := pjnarr[i]
+				b := ""
+				if i < bl {
+					b = buyerarr[i]
+				}
+				tmp2 := []string{city, b, addr, p, title}
+				pb = append(pb, tmp2)
 			}
-		}
-		if key == "buyer" {
-			for _, v := range val {
-				b := fmt.Sprint(v.Value)
-				buyerarr = append(buyerarr, b)
+		} else {
+			for i := 0; i < max; i++ {
+				b := buyerarr[i]
+				p := ""
+				if i < pl {
+					p = pjnarr[i]
+				}
+				tmp3 := []string{city, b, addr, p, title}
+				pb = append(pb, tmp3)
 			}
 		}
+
 	}
+	log.Println(pb)
 	return
 }
 func ExtractPC(buyer, projectname, title, city, province, addr string, id interface{}) (bres bool, c, p string) {
@@ -831,18 +859,18 @@ func ExtractPC(buyer, projectname, title, city, province, addr string, id interf
 	bp := false
 	if ProvinceBrief[province] != nil {
 		bp = true
-	} else { //先识别省份
+	} else { //没有省份,先识别省份
 		for _, str := range []string{city, buyer, addr, projectname, title} {
-			word := AreaProvinceGet.CheckSensitiveWord(str)
+			word := AreaProvinceGet.CheckSensitiveWord(str) //省全称
 			if word != "" {
-				province = ProvinceMap[word]
+				province = ProvinceMap[word] //省简称
 				bp = true
 				break
 			}
 		}
 	}
 	//匹配城市
-	if bc { //简称不存在CityBrief[city]==nil 或省份不对
+	if bc { //城市简称不存在CityBrief[city]==nil,或城市简称存在但省份不对,继续抽取
 		//目前是全匹配模式,如果再加上精简匹配,加一层循环
 		for pos, GET := range []DFA{AreaGet, AreaSimGet} {
 			ws := make([]string, 5)