// extpackage package extract import ( "jy/clear" ju "jy/util" "log" qu "qfw/util" "reflect" "regexp" "sort" ) func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask, isSite bool, codeSite string) { if pkg.ColonKV != nil { kvparse(pkg.ColonKV, e, sonJobResult, isSite, codeSite) } if pkg.TableKV != nil { kvparse(pkg.TableKV, e, sonJobResult, isSite, codeSite) } if pkg.SpaceKV != nil { kvparse(pkg.SpaceKV, e, sonJobResult, isSite, codeSite) } } func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string) { if p != nil { for pk, pv2 := range p.KvTags { if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") { tmp := []*ju.Tag{} var tmpindex, tmpweight int = -9999, -9999 for ii, vv := range pv2 { if pk == "中标单位" && regexp.MustCompile("[0-9.元人¥$]").MatchString(vv.Value){ continue } if tmpweight < vv.Weight { tmpindex = ii tmpweight = vv.Weight } } tmp = append(tmp, pv2[tmpindex]) p.KvTags[pk] = tmp } } for pk, pv := range p.KvTags { if len(pv) == 0 { continue } tags := ju.GetTags(pk, isSite, codeSite) if tags.Len() > 0 { if ((*sonJobResult)["name"] == nil || (*sonJobResult)["name"] == "") && tags[0].Key == "项目名称" { (*sonJobResult)["name"] = pv[0].Value } if qu.Float64All((*sonJobResult)["budget"]) == 0 && tags[0].Key == "预算" { lock.Lock() cfn := e.ClearFn["budget"] lock.Unlock() data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""}) (*sonJobResult)["budget"] = data[0] continue } if qu.Float64All((*sonJobResult)["bidamount"]) == 0 && tags[0].Key == "中标金额" { lock.Lock() cfn := e.ClearFn["budget"] lock.Unlock() data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""}) (*sonJobResult)["bidamount"] = data[0] continue } if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "") && tags[0].Key == "中标单位" { (*sonJobResult)["winner"] = pv[0].Value } } if (*sonJobResult)["name"] == nil && pk == "名称" { (*sonJobResult)["name"] = pv[0].Value } } } } //处理分包信息 func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) { qu.Try(func() { if len(j.BlockPackage) > 0 { for _, ev := range e.PkgRuleCores { for _, eve := range ev.RuleCores { if !eve.IsLua { ExtRuleCoreByPkgReg(j, eve, e) // 分包正则抽取 预算 中标单位 中标价 成交状态 } } for _, evb := range ev.RuleBacks { if !evb.IsLua { ExtRegBackPkg(j, evb) // 分包正则清理 中标单位 成交状态 内容 名称 } } } tmpkeys := []string{} for k, _ := range j.BlockPackage { if k == "" { continue } tmpkeys = append(tmpkeys, k) } sort.Strings(tmpkeys) packageResult := map[string]map[string]interface{}{} //packagenum := len(j.BlockPackage) for _, pkName := range tmpkeys { pkg, ok := j.BlockPackage[pkName] if !ok { continue } //是否清理标记 clearmap := map[string]bool{} sonJobResult := map[string]interface{}{} if pkg != nil { sonJobResult["origin"] = pkg.Origin sonJobResult["text"] = pkg.Text sonJobResult["budget"] = pkg.Budget sonJobResult["bidamount"] = pkg.Bidamount if pkg.Winner == "" && len(j.Winnerorder) > 0 { if sonJobResult["winnerorder"] == nil { sonJobResult["winnerorder"] = j.Winnerorder if sonJobResult["bidamount"].(float64) <= 0 { sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"]) } if sonJobResult["winner"] == "" { sonJobResult["winner"] = j.Winnerorder[0]["entname"] } } } else { if len(j.Winnerorder) > 0 { sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"]) sonJobResult["winner"] = j.Winnerorder[0]["entname"] } sonJobResult["winnerorder"] = pkg.WinnerOrder } pkvdata(pkg, &sonJobResult, e, isSite, codeSite) sonJobResult["type"] = pkg.Type if len(tmpkeys) == 1 { if qu.Float64All(sonJobResult["budget"]) == 0 { for _, bv := range j.Block { kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite) kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite) kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite) } } } if sonJobResult["name"] == nil { sonJobResult["name"] = j.Title } } //分包暂不参与选举 /* for k, tags := range e.Tag { L: for _, tag := range tags { if pkg.TableKV != nil { for key, val := range pkg.TableKV.Kv { if tag.Key == key { clearmap[k] = false var tmpval interface{} if len(e.ClearFn[k]) > 0 { data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content}) tmpval = data[0] } else { tmpval = val } sonJobResult[k] = tmpval if packagenum == 1 { field := &ju.ExtField{ Field: k, Code: "package", RuleText: "package", Type: "table", MatchType: "tag_string", ExtFrom: "package", Value: tmpval, Score: 0, } j.Result[k] = append(j.Result[k], field) } break L } } } if pkg.ColonKV != nil { for key, val := range pkg.ColonKV.Kv { if tag.Key == key { clearmap[k] = true var tmpval interface{} if len(e.ClearFn[k]) > 0 { data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content}) tmpval = data[0] } else { tmpval = val } sonJobResult[k] = tmpval if packagenum == 1 { field := &ju.ExtField{ Field: k, Code: "package", RuleText: "package", Type: "colon", MatchType: "tag_string", ExtFrom: "package", Value: tmpval, Score: 0, } j.Result[k] = append(j.Result[k], field) } break L } } } if pkg.SpaceKV != nil { for key, val := range pkg.SpaceKV.Kv { if tag.Key == key { clearmap[k] = true var tmpval interface{} if len(e.ClearFn[k]) > 0 { data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content}) tmpval = data[0] } else { tmpval = val } sonJobResult[k] = tmpval if packagenum == 1 { field := &ju.ExtField{ Field: k, Code: "package", RuleText: "package", Type: "space", MatchType: "tag_string", ExtFrom: "package", Value: tmpval, Score: 0, } j.Result[k] = append(j.Result[k], field) } break L } } } } } */ //如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值 if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 { firstWinnerOrder := pkg.WinnerOrder[0] if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) { sonJobResult["winner"] = firstWinnerOrder["entname"] } if qu.Float64All(sonJobResult["bidamount"]) == 0 || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) { sonJobResult["bidamount"] = firstWinnerOrder["price"] } } //log.Println(pkName, sonJobResult) sonJobResult["clear"] = clearmap packageResult[pkName] = sonJobResult } if len(packageResult) > 0 { j.PackageInfo = packageResult } } //extRegBackPack(j, e) }, func(err interface{}) { log.Println("PackageDetail err", err) }) } //清理分包信息 func extRegBackPack(j *ju.Job, e *ExtractTask) { defer qu.Catch() //正则清理 if j.CategorySecond == "" { for _, rc1 := range e.RuleCores[j.Category] { for _, rc := range rc1 { for pk, pack := range j.PackageInfo { clear, _ := pack["clear"].(map[string]interface{}) for k, val := range pack { if b, ok := clear[k].(bool); ok && b { if rc.Field == k { text := qu.ObjToString(val) for _, in := range rc.RuleBacks { if text != "" && !in.IsLua { text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } } pack[k] = text } } } j.PackageInfo[pk] = pack } } } } else { for _, rc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] { for _, rc := range rc1 { for pk, pack := range j.PackageInfo { clear, _ := pack["clear"].(map[string]interface{}) for k, val := range pack { if b, ok := clear[k].(bool); ok && b { if rc.Field == k { text := qu.ObjToString(val) for _, in := range rc.RuleBacks { if text != "" && !in.IsLua { text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } } pack[k] = text } } } j.PackageInfo[pk] = pack } } } } //通用正则清理 for _, in := range e.RuleBacks { for _, pack := range j.PackageInfo { for k, val := range pack { if in.Field == k { text := qu.ObjToString(val) if text != "" && !in.IsLua { text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } pack[k] = text } } } } //函数清理 for _, pack := range j.PackageInfo { for key, val := range pack { if reflect.TypeOf(val) != nil && (reflect.TypeOf(val).String() == "float64" || reflect.TypeOf(val).String() == "int64") { continue } else { lock.Lock() cfn := e.ClearFn[key] lock.Unlock() data := clear.DoClearFn(cfn, []interface{}{val, j.Content}) pack[key] = data[0] } } } //特殊属性的分包清理 for _, rc := range e.PkgRuleCores { for pk, pack := range j.PackageInfo { for k, val := range pack { if rc.Field == k { text := qu.ObjToString(val) for _, in := range rc.RuleBacks { if text != "" { if !in.IsLua { //正则 text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace) } else { //lua result := GetResultMapForLua(j) lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText} if j != nil { lua.Block = j.Block } extinfo := lua.RunScript("back") if extinfo["value"] != nil { text = qu.ObjToString(extinfo["value"]) } } } } pack[k] = text } } j.PackageInfo[pk] = pack } } }