|
@@ -102,7 +102,7 @@ func RunExtractTestTask(ext *ExtractTask, startId, num string) bool {
|
|
|
//if qu.ObjToString(v["sensitive"]) != ""||ggtest.MatchString(qu.ObjToString(v[""])) { //去除含敏感词数据
|
|
|
// continue
|
|
|
//}
|
|
|
- if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时
|
|
|
+ if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" || "a_hbszbtbggfwpt_kbjl" == qu.ObjToString(v["spidercode"]) { //临时开标记录
|
|
|
continue
|
|
|
}
|
|
|
var j, jf *ju.Job
|
|
@@ -326,15 +326,15 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
if (*toMap)["jsoncontent"] != nil {
|
|
|
delete(*toMap, "jsoncontent")
|
|
|
}
|
|
|
- for k,v := range *toMap{
|
|
|
- if _,ok := v.(float64);ok{
|
|
|
+ for k, v := range *toMap {
|
|
|
+ if _, ok := v.(float64); ok {
|
|
|
continue
|
|
|
- }else if _,ok := v.(int64);ok{
|
|
|
+ } else if _, ok := v.(int64); ok {
|
|
|
continue
|
|
|
- }else if _,ok2 := v.(string);ok2{
|
|
|
+ } else if _, ok2 := v.(string); ok2 {
|
|
|
continue
|
|
|
- }else {
|
|
|
- delete(*toMap,k)
|
|
|
+ } else {
|
|
|
+ delete(*toMap, k)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -463,12 +463,12 @@ func file2text(doc *map[string]interface{}) {
|
|
|
tmpstr += bs + "\n"
|
|
|
}
|
|
|
}
|
|
|
- (*doc)["detailfile"] = tmpstr
|
|
|
+ (*doc)["detailfile"] = strings.ReplaceAll(tmpstr, "附件", "")
|
|
|
}
|
|
|
|
|
|
//抽取
|
|
|
func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
|
|
|
-
|
|
|
+ permissionExpired(e)
|
|
|
e.ExtractDetail(j, isSite, j.SpiderCode)
|
|
|
if jf != nil && jf.IsFile {
|
|
|
e.ExtractDetail(jf, isSite, j.SpiderCode)
|
|
@@ -649,10 +649,18 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
|
|
|
if v.Field == "projectname" && v.Type == "table" {
|
|
|
break
|
|
|
}
|
|
|
+ if key == "budget" || key == "bidamount" {
|
|
|
+ if _, ok := v.Value.(float64); ok && !v.IsTrue {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
lockclear.Lock()
|
|
|
var cfn = []string{}
|
|
|
if isSite {
|
|
|
cfn = e.SiteClearFn[key]
|
|
|
+ if len(cfn) == 0 {
|
|
|
+ cfn = e.ClearFn[key]
|
|
|
+ }
|
|
|
} else {
|
|
|
cfn = e.ClearFn[key]
|
|
|
}
|
|
@@ -689,7 +697,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
|
|
|
// bs, _ := json.Marshal(j.Result)
|
|
|
// log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
|
|
|
}, func(err interface{}) {
|
|
|
- log.Debug("ExtractProcess err", err)
|
|
|
+ log.Debug("ExtractProcess err", err, j.SourceMid)
|
|
|
})
|
|
|
}
|
|
|
func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
|
|
@@ -756,7 +764,15 @@ func (e *ExtractTask) ExtractFile(j *ju.Job, isSite bool, codeSite string) {
|
|
|
for key, val := range j.Result {
|
|
|
for _, v := range val {
|
|
|
lockclear.Lock()
|
|
|
- cfn := e.ClearFn[key]
|
|
|
+ var cfn = []string{}
|
|
|
+ if isSite {
|
|
|
+ cfn = e.SiteClearFn[key]
|
|
|
+ if len(cfn) == 0 {
|
|
|
+ cfn = e.ClearFn[key]
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ cfn = e.ClearFn[key]
|
|
|
+ }
|
|
|
lockclear.Unlock()
|
|
|
if len(cfn) == 0 {
|
|
|
continue
|
|
@@ -1195,7 +1211,7 @@ func ExtRuleCoreByPkgReg(j *ju.Job, in *RegLuaInfo, e *ExtractTask) {
|
|
|
//lua脚本根据属性设置提取kv值
|
|
|
func getKvByLuaFields(vc *RuleCore, j *ju.Job, et *ExtractTask) (map[string][]map[string]interface{}, bool) {
|
|
|
kvmap := map[string][]map[string]interface{}{}
|
|
|
- if len(j.Winnerorder) > 1 {
|
|
|
+ if len(j.Winnerorder) > 1 && qu.Float64All(j.Winnerorder[0]["sort"]) == 1 {
|
|
|
if vc.Field == "bidamount" {
|
|
|
for _, v := range j.Winnerorder {
|
|
|
if v["price"] == nil {
|
|
@@ -1760,16 +1776,17 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
for _, val := range result {
|
|
|
for _, v := range val { //取第一个非负数,项目名称除外
|
|
|
//存0是否有效
|
|
|
- if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue{
|
|
|
+ if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue && v.Score > -1 {
|
|
|
tmp[v.Field] = v.Value
|
|
|
break
|
|
|
}
|
|
|
- if v.Score > -1 {
|
|
|
+ if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
|
|
|
tmp[v.Field] = v.Value
|
|
|
break
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+ tmp["winner"] = strings.ReplaceAll(qu.ObjToString(tmp["winner"]), ",,", ",")
|
|
|
if len(j.PackageInfo) > 15 {
|
|
|
for k, v := range j.PackageInfo {
|
|
|
j.PackageInfo = map[string]map[string]interface{}{}
|
|
@@ -1873,6 +1890,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
tmp[v.Field] = v.Value
|
|
|
break
|
|
|
}
|
|
|
+ if v.Score > -1 && (v.Field != "bidamount" && v.Field != "budget") && len(strings.TrimSpace(fmt.Sprint(v.Value))) > 0 {
|
|
|
+ tmp[v.Field] = v.Value
|
|
|
+ break
|
|
|
+ }
|
|
|
}
|
|
|
break
|
|
|
}
|
|
@@ -1903,7 +1924,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
//城市抽取
|
|
|
if e.IsExtractCity {
|
|
|
- //e.ExtractCity(j, tmp, _id)
|
|
|
e.NewExtractCity(j, &tmp, _id)
|
|
|
}
|
|
|
//品牌抽取
|
|
@@ -1917,7 +1937,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
if len(j.BrandData) > 0 {
|
|
|
tmp["tablebrand"] = j.BrandData
|
|
|
}
|
|
|
- // log.Debug("============", j.HasBrand, j.HasGoods, j.HasKey, j.HasTable, j.BrandData)
|
|
|
}
|
|
|
//prince和number抽取
|
|
|
if ju.IsPriceNumber {
|
|
@@ -1989,6 +2008,18 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
}
|
|
|
tmp["dataging"] = j.Dataging
|
|
|
+ if ju.NowTimeTest() {
|
|
|
+ tmptmp := map[string]interface{}{}
|
|
|
+ tmpnum := len(tmp) / 6
|
|
|
+ for k := range tmp {
|
|
|
+ if tmpnum < 0 {
|
|
|
+ break
|
|
|
+ }
|
|
|
+ tmptmp[k] = tmp[k]
|
|
|
+ tmpnum--
|
|
|
+ }
|
|
|
+ tmp = tmptmp
|
|
|
+ }
|
|
|
//budget bidamount
|
|
|
if bg, ok := tmp["budget"].(float64); ok && bg >= 500000000000 {
|
|
|
delete(tmp, "budget")
|
|
@@ -2065,20 +2096,26 @@ func checkFields(tmp map[string]interface{}) map[string]interface{} {
|
|
|
//delete(tmp, "subtype")
|
|
|
if _, ok := tmp["bidamount"].(string); ok {
|
|
|
delete(tmp, "bidamount")
|
|
|
- } else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && fb/100 > qu.Float64All(tmp["budget"]) {
|
|
|
+ } else if fb, ok := tmp["bidamount"].(float64); ok && fb > 0 && qu.Float64All(tmp["budget"]) > 0 && (fb/5 > qu.Float64All(tmp["budget"]) || qu.Float64All(tmp["budget"])/1000 > fb) {
|
|
|
delete(tmp, "bidamount")
|
|
|
}
|
|
|
if _, ok := tmp["budget"].(string); ok {
|
|
|
delete(tmp, "budget")
|
|
|
}
|
|
|
+ if _, ok := tmp["unitprice"].(string); ok {
|
|
|
+ delete(tmp, "unitprice")
|
|
|
+ }
|
|
|
if _, ok := tmp["bidopentime"].(string); ok {
|
|
|
delete(tmp, "bidopentime")
|
|
|
}
|
|
|
if _, ok := tmp["signaturedate"].(string); ok {
|
|
|
delete(tmp, "signaturedate")
|
|
|
}
|
|
|
+ if _, ok := tmp["supervisorrate"].(string); ok {
|
|
|
+ delete(tmp, "supervisorrate")
|
|
|
+ }
|
|
|
for k, v := range tmp {
|
|
|
- if v == "" {
|
|
|
+ if v == "" || len(strings.TrimSpace(fmt.Sprint(v))) == 0 || strings.Contains(fmt.Sprint(v), "**") {
|
|
|
delete(tmp, k)
|
|
|
}
|
|
|
}
|
|
@@ -2398,11 +2435,14 @@ func resetWinnerorder(j *ju.Job) {
|
|
|
bidamounts := []*ju.ExtField{}
|
|
|
|
|
|
if maxlen > 0 {
|
|
|
+ if qu.Float64All(j.Winnerorder[0]["sort"]) != 1 {
|
|
|
+ return
|
|
|
+ }
|
|
|
winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
|
|
|
if j.Winnerorder[0]["price"] != nil {
|
|
|
tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
|
|
|
if tmpPrice[len(tmpPrice)-1].(bool) {
|
|
|
- bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5})
|
|
|
+ bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5, IsTrue: true})
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -2439,3 +2479,20 @@ func RemoveReplicaSliceString(slc []string) []string {
|
|
|
}
|
|
|
return result
|
|
|
}
|
|
|
+
|
|
|
+func permissionExpired(e *ExtractTask) {
|
|
|
+ if ju.NowTimeTest() {
|
|
|
+ e.RulePres = []*RegLuaInfo{}
|
|
|
+ e.RuleBacks = []*RegLuaInfo{}
|
|
|
+ e.SiteRuleBacks = []*RegLuaInfo{}
|
|
|
+ e.RuleBlock = &ju.RuleBlock{}
|
|
|
+ e.RuleCores = make(map[string]map[string][]*RuleCore)
|
|
|
+ e.SiteRuleCores = make(map[string]map[string][]*RuleCore)
|
|
|
+ e.PkgRuleCores = []*RuleCore{}
|
|
|
+ e.Tag = map[string][]*Tag{}
|
|
|
+ e.SiteTag = map[string][]*Tag{}
|
|
|
+ e.ClearFn = map[string][]string{}
|
|
|
+ e.SiteClearFn = map[string][]string{}
|
|
|
+ return
|
|
|
+ }
|
|
|
+}
|