|
@@ -103,7 +103,6 @@ func StartExtractTaskId(taskId string) bool {
|
|
|
ext.InitCityAll()
|
|
|
ext.InitCitySim()
|
|
|
|
|
|
- return true
|
|
|
ext.IsRun = true
|
|
|
if isgo {
|
|
|
go RunExtractTask(taskId)
|
|
@@ -248,7 +247,7 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
|
|
|
|
|
|
//fmt.Println("-----------", j.Province, j.City, j.BuyerAddr, j.Title) //j.Address
|
|
|
//ExtractPC(j.Result, j.Province, j.City, j.Title, j.BuyerAddr, j.SourceMid) //j.Address
|
|
|
-
|
|
|
+ ExtractPC2(j.Result, "Province", "City", "Title", "Addr", j.SourceMid)
|
|
|
//分析抽取结果并保存 todo
|
|
|
AnalysisSaveResult(j.Data, j.Result, e.TaskInfo)
|
|
|
|
|
@@ -772,22 +771,51 @@ func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.Ext
|
|
|
|
|
|
//抽取城市、省份
|
|
|
func ExtractPC2(result map[string][]*ju.ExtField, province, city, title, addr, sourcemid string) (bres bool, c, p string) {
|
|
|
- pjnarr := make([]string, 0) //获取所有项目名称结果集
|
|
|
- buyerarr := make([]string, 0) //获取所有采购单位结果集
|
|
|
- for key, val := range result {
|
|
|
- if key == "projectname" {
|
|
|
- for _, v := range val {
|
|
|
- p := fmt.Sprint(v.Value)
|
|
|
- pjnarr = append(pjnarr, p)
|
|
|
+ var pjnarr, buyerarr []string
|
|
|
+ var pb []interface{}
|
|
|
+ for n, val := range result["projectname"] {
|
|
|
+ pjnarr[n] = fmt.Sprint(val.Value)
|
|
|
+ }
|
|
|
+ for n, val := range result["buyer"] {
|
|
|
+ buyerarr[n] = fmt.Sprint(val.Value)
|
|
|
+ }
|
|
|
+ pl := len(pjnarr)
|
|
|
+ bl := len(buyerarr)
|
|
|
+ max := 0
|
|
|
+ if pl > bl {
|
|
|
+ max = pl
|
|
|
+ } else {
|
|
|
+ max = bl
|
|
|
+ }
|
|
|
+ //city, buyer, addr, projectname, title
|
|
|
+ if max == 0 { //没有projectname和buyer结果集
|
|
|
+ tmp1 := []string{city, "", addr, "", title}
|
|
|
+ pb = append(pb, tmp1)
|
|
|
+ } else { //至少有一个结果集
|
|
|
+ if max == pl {
|
|
|
+ for i := 0; i < max; i++ {
|
|
|
+ p := pjnarr[i]
|
|
|
+ b := ""
|
|
|
+ if i < bl {
|
|
|
+ b = buyerarr[i]
|
|
|
+ }
|
|
|
+ tmp2 := []string{city, b, addr, p, title}
|
|
|
+ pb = append(pb, tmp2)
|
|
|
}
|
|
|
- }
|
|
|
- if key == "buyer" {
|
|
|
- for _, v := range val {
|
|
|
- b := fmt.Sprint(v.Value)
|
|
|
- buyerarr = append(buyerarr, b)
|
|
|
+ } else {
|
|
|
+ for i := 0; i < max; i++ {
|
|
|
+ b := buyerarr[i]
|
|
|
+ p := ""
|
|
|
+ if i < pl {
|
|
|
+ p = pjnarr[i]
|
|
|
+ }
|
|
|
+ tmp3 := []string{city, b, addr, p, title}
|
|
|
+ pb = append(pb, tmp3)
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
}
|
|
|
+ log.Println(pb)
|
|
|
return
|
|
|
}
|
|
|
func ExtractPC(buyer, projectname, title, city, province, addr string, id interface{}) (bres bool, c, p string) {
|
|
@@ -809,18 +837,18 @@ func ExtractPC(buyer, projectname, title, city, province, addr string, id interf
|
|
|
bp := false
|
|
|
if ProvinceBrief[province] != nil {
|
|
|
bp = true
|
|
|
- } else { //先识别省份
|
|
|
+ } else { //没有省份,先识别省份
|
|
|
for _, str := range []string{city, buyer, addr, projectname, title} {
|
|
|
- word := AreaProvinceGet.CheckSensitiveWord(str)
|
|
|
+ word := AreaProvinceGet.CheckSensitiveWord(str) //省全称
|
|
|
if word != "" {
|
|
|
- province = ProvinceMap[word]
|
|
|
+ province = ProvinceMap[word] //省简称
|
|
|
bp = true
|
|
|
break
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
//匹配城市
|
|
|
- if bc { //简称不存在CityBrief[city]==nil 或省份不对
|
|
|
+ if bc { //城市简称不存在CityBrief[city]==nil,或城市简称存在但省份不对,继续抽取
|
|
|
//目前是全匹配模式,如果再加上精简匹配,加一层循环
|
|
|
for pos, GET := range []DFA{AreaGet, AreaSimGet} {
|
|
|
ws := make([]string, 5)
|