|
@@ -216,9 +216,7 @@ func RunExtractTask(taskId string) {
|
|
|
//if qu.ObjToString(v["sensitive"]) != "" { //去除含敏感词数据
|
|
|
// continue
|
|
|
//}
|
|
|
- if qu.ObjToString(v["spidercode"]) == "a_gjggzyjypt_gcjs_kbjl" { //临时
|
|
|
- continue
|
|
|
- }
|
|
|
+ //根据标题判断是否抽取
|
|
|
b := IsExtract("title", qu.ObjToString(v["title"]), "")
|
|
|
if !b {
|
|
|
continue
|
|
@@ -328,6 +326,17 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
if (*toMap)["jsoncontent"] != nil {
|
|
|
delete(*toMap, "jsoncontent")
|
|
|
}
|
|
|
+ for k,v := range *toMap{
|
|
|
+ if _,ok := v.(float64);ok{
|
|
|
+ continue
|
|
|
+ }else if _,ok := v.(int64);ok{
|
|
|
+ continue
|
|
|
+ }else if _,ok2 := v.(string);ok2{
|
|
|
+ continue
|
|
|
+ }else {
|
|
|
+ delete(*toMap,k)
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
j = &ju.Job{
|
|
|
SourceMid: qu.BsonIdToSId(doc["_id"]),
|
|
@@ -459,12 +468,21 @@ func file2text(doc *map[string]interface{}) {
|
|
|
|
|
|
//抽取
|
|
|
func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
|
|
|
+
|
|
|
e.ExtractDetail(j, isSite, j.SpiderCode)
|
|
|
if jf != nil && jf.IsFile {
|
|
|
- e.ExtractFile(jf, isSite, j.SpiderCode)
|
|
|
- for tmpk, _ := range jf.Result {
|
|
|
+ e.ExtractDetail(jf, isSite, j.SpiderCode)
|
|
|
+ for tmpk, xs := range jf.Result {
|
|
|
if len(j.Result[tmpk]) == 0 {
|
|
|
+ if tmpk == "budget" || tmpk == "bidamount" {
|
|
|
+ for _, v := range xs {
|
|
|
+ if fv, ok := v.Value.(float64); ok && fv > 100 && fv < 50000000000 {
|
|
|
+ j.Result[tmpk] = append(j.Result[tmpk], v)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } else {
|
|
|
j.Result[tmpk] = append(j.Result[tmpk], jf.Result[tmpk]...)
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
if len(j.Winnerorder) == 0 && jf.Winnerorder != nil && len(jf.Winnerorder) > 0 {
|
|
@@ -1455,6 +1473,12 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo, vc *RuleCore) {
|
|
|
return
|
|
|
}
|
|
|
text := qu.ObjToString(v.Value)
|
|
|
+ if v.Field == "bidamount" || v.Field == "budget" {
|
|
|
+ if strings.Contains(qu.ObjToString(v.SourceValue), "费率") {
|
|
|
+ j.Result[in.Field][k].IsTrue = false
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
if text != "" {
|
|
|
text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
|
|
|
}
|
|
@@ -1825,7 +1849,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
if len(j.Winnerorder) > 0 { //候选人信息
|
|
|
for i, v := range j.Winnerorder {
|
|
|
if v["price"] != nil {
|
|
|
- j.Winnerorder[i]["price"] = clear.ObjToMoney([]interface{}{v["price"], ""}, j.SpiderCode)[0] }
|
|
|
+ tmpPrice := clear.ObjToMoney([]interface{}{v["price"], ""}, j.SpiderCode, j.IsClearnMoney)
|
|
|
+ if tmpPrice[len(tmpPrice)-1].(bool) {
|
|
|
+ j.Winnerorder[i]["price"] = tmpPrice[0]
|
|
|
+ } else {
|
|
|
+ delete(j.Winnerorder[i], "price")
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
tmp["winnerorder"] = j.Winnerorder
|
|
|
}
|
|
@@ -1839,12 +1869,9 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
if v.Score > -1 {
|
|
|
ffield[v.Field] = v.Value
|
|
|
if tmp[v.Field] == nil {
|
|
|
- if v.Field == "budget" || v.Field == "bidamount" {
|
|
|
- if fv, ok := v.Value.(float64); ok && fv > 100 && fv < 50000000000 {
|
|
|
- tmp[v.Field] = v.Value
|
|
|
- }
|
|
|
- } else {
|
|
|
+ if (v.Field == "bidamount" || v.Field == "budget") && v.IsTrue {
|
|
|
tmp[v.Field] = v.Value
|
|
|
+ break
|
|
|
}
|
|
|
}
|
|
|
break
|
|
@@ -1946,7 +1973,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
kvtext.WriteString(jv_k)
|
|
|
kvtext.WriteString(":")
|
|
|
kvtext.WriteString(jv_vv.Value)
|
|
|
- kvtext.WriteString(" ")
|
|
|
+ kvtext.WriteString("\n")
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -1975,21 +2002,13 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
tmp["projectname"] = j.Title
|
|
|
}
|
|
|
tmp["repeat"] = 0
|
|
|
+ if ju.Ffield {
|
|
|
+ if len(ffield) > 0 {
|
|
|
+ tmp["ffield"] = ffield
|
|
|
+ }
|
|
|
+ }
|
|
|
if e.TaskInfo.TestColl == "" {
|
|
|
if len(tmp) > 0 { //保存抽取结果
|
|
|
- /* if len(e.SiteFields) <= 0 {
|
|
|
- //for field, _ := range e.Fields {
|
|
|
- // if tmp[field] == nil && {
|
|
|
- // tmp[field] = "" //覆盖之前版本数据
|
|
|
- // }
|
|
|
- //}
|
|
|
- } else {
|
|
|
- //for field, _ := range e.SiteFields {
|
|
|
- // if tmp[field] == nil &&{
|
|
|
- // tmp[field] = "" //覆盖之前版本数据
|
|
|
- // }
|
|
|
- //}
|
|
|
- }*/
|
|
|
tmparr := []map[string]interface{}{
|
|
|
map[string]interface{}{
|
|
|
"_id": qu.StringTOBsonId(_id),
|
|
@@ -2018,19 +2037,6 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
} else { //测试结果
|
|
|
delete(tmp, "_id")
|
|
|
- //auxinfo := auxInfo(j)
|
|
|
- //auxinfof := auxInfo(jf)
|
|
|
- //if len(auxinfo) > 0 {
|
|
|
- // tmp["fieldall"] = auxinfo
|
|
|
- //}
|
|
|
- //if len(auxinfof) > 0 {
|
|
|
- // tmp["fieldallf"] = auxinfof
|
|
|
- //}
|
|
|
- if ju.Ffield {
|
|
|
- if len(ffield) > 0 {
|
|
|
- tmp["ffield"] = ffield
|
|
|
- }
|
|
|
- }
|
|
|
delete(tmp, "fieldall")
|
|
|
if len(j.BlockPackage) > 0 { //分包详情
|
|
|
if len(j.BlockPackage) > 10 {
|
|
@@ -2410,7 +2416,17 @@ func resetWinnerorder(j *ju.Job) {
|
|
|
} else if len(bidamounts) > 0 {
|
|
|
j.Result["bidamount"] = append(j.Result["bidamount"], bidamounts...)
|
|
|
}
|
|
|
-
|
|
|
+ if j.Result["winner"] == nil && len(j.Winnerorder) > 0 && qu.Float64All(j.Winnerorder[0]["sort"]) == 1 {
|
|
|
+ winners = append(winners, &ju.ExtField{Code: "winnerorder", Field: "winner", ExtFrom: "j.Winnerorder", Value: j.Winnerorder[0]["entname"], Score: 0.5})
|
|
|
+ j.Result["winner"] = winners
|
|
|
+ if j.Winnerorder[0]["price"] != nil {
|
|
|
+ tmpPrice := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"], ""}, j.SpiderCode, j.IsClearnMoney)
|
|
|
+ if tmpPrice[len(tmpPrice)-1].(bool) {
|
|
|
+ bidamounts = append(bidamounts, &ju.ExtField{Code: "winnerorder", Field: "bidamount", ExtFrom: "j.Winnerorder", SourceValue: j.Winnerorder[0]["price"], Value: tmpPrice[0], Score: 2.5, IsTrue: true})
|
|
|
+ }
|
|
|
+ j.Result["bidamount"] = bidamounts
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|
|
|
func RemoveReplicaSliceString(slc []string) []string {
|
|
|
result := make([]string, 0)
|