|
@@ -3,6 +3,7 @@ package main
|
|
import (
|
|
import (
|
|
"fmt"
|
|
"fmt"
|
|
"log"
|
|
"log"
|
|
|
|
+ "math"
|
|
qutil "qfw/util"
|
|
qutil "qfw/util"
|
|
"regexp"
|
|
"regexp"
|
|
"strings"
|
|
"strings"
|
|
@@ -72,7 +73,7 @@ func TimedTaskDatamap(days int,lasttime int64) *datamap {
|
|
n, continuSum := 0, 0
|
|
n, continuSum := 0, 0
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
//qutil.IntAll(tmp["dataging"]) == 1
|
|
//qutil.IntAll(tmp["dataging"]) == 1
|
|
- if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1||qutil.IntAll(tmp["dataging"]) == 1 {
|
|
|
|
|
|
+ if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
|
|
|
|
|
|
} else {
|
|
} else {
|
|
pt := tmp["publishtime"]
|
|
pt := tmp["publishtime"]
|
|
@@ -106,7 +107,7 @@ func TimedTaskDatamap(days int,lasttime int64) *datamap {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if n%50000 == 0 {
|
|
if n%50000 == 0 {
|
|
- log.Println("current 数据池:", n, continuSum)
|
|
|
|
|
|
+ log.Println("当前数据池:", n, continuSum)
|
|
}
|
|
}
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
}
|
|
}
|
|
@@ -132,12 +133,16 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
"$lte": StringTOBsonId(lastid),
|
|
"$lte": StringTOBsonId(lastid),
|
|
}}
|
|
}}
|
|
log.Println("query", query)
|
|
log.Println("query", query)
|
|
- it := sess.DB(mgo.DbName).C(extract).Find(query).Sort("-_id").Iter()
|
|
|
|
|
|
+ sortName := "-_id"
|
|
|
|
+ if Is_Sort {
|
|
|
|
+ sortName = "-publishtime"
|
|
|
|
+ }
|
|
|
|
+ it := sess.DB(mgo.DbName).C(extract).Find(query).Sort(sortName).Iter()
|
|
now1 := int64(0)
|
|
now1 := int64(0)
|
|
n, continuSum := 0, 0
|
|
n, continuSum := 0, 0
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
- if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
|
|
|
|
- continuSum++
|
|
|
|
|
|
+ if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1{
|
|
|
|
+
|
|
} else {
|
|
} else {
|
|
pt := tmp["comeintime"]
|
|
pt := tmp["comeintime"]
|
|
if Is_Sort {
|
|
if Is_Sort {
|
|
@@ -151,6 +156,7 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
now1 = pt_time
|
|
now1 = pt_time
|
|
}
|
|
}
|
|
if qutil.Float64All(now1-pt_time) < datelimit {
|
|
if qutil.Float64All(now1-pt_time) < datelimit {
|
|
|
|
+ continuSum++
|
|
info := NewInfo(tmp)
|
|
info := NewInfo(tmp)
|
|
dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
|
|
dkey := qutil.FormatDateWithObj(&pt, qutil.Date_yyyyMMdd)
|
|
k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
|
|
k := fmt.Sprintf("%s_%s_%s", dkey, info.subtype, info.area)
|
|
@@ -178,11 +184,11 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if n%5000 == 0 {
|
|
if n%5000 == 0 {
|
|
- log.Println("current n:", n, continuSum)
|
|
|
|
|
|
+ log.Println("当前 n:", n,"数量:" ,continuSum)
|
|
}
|
|
}
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
}
|
|
}
|
|
- log.Println("load data:", n)
|
|
|
|
|
|
+ log.Println("load data:", n,"总数:",continuSum)
|
|
return dm
|
|
return dm
|
|
}
|
|
}
|
|
|
|
|
|
@@ -278,14 +284,22 @@ L:
|
|
//前置条件1 - 站点相关
|
|
//前置条件1 - 站点相关
|
|
if info.site != "" && info.site == v.site {
|
|
if info.site != "" && info.site == v.site {
|
|
if info.href != "" && info.href == v.href {
|
|
if info.href != "" && info.href == v.href {
|
|
- reason = "href相同"
|
|
|
|
|
|
+ reason = "同站点-href相同"
|
|
b = true
|
|
b = true
|
|
source = v
|
|
source = v
|
|
reasons = reason
|
|
reasons = reason
|
|
break L
|
|
break L
|
|
}
|
|
}
|
|
if info.href != "" && info.href != v.href {
|
|
if info.href != "" && info.href != v.href {
|
|
- reason = "href不同-"
|
|
|
|
|
|
+ if v.title==info.title && isTheSameDay(info.publishtime,v.publishtime){
|
|
|
|
+ reason = "同站点-href不同-标题相同"
|
|
|
|
+ b = true
|
|
|
|
+ source = v
|
|
|
|
+ reasons = reason
|
|
|
|
+ break L
|
|
|
|
+ }else {
|
|
|
|
+ continue
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -309,15 +323,20 @@ L:
|
|
if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
|
|
if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
|
|
letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
|
|
letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
|
|
}
|
|
}
|
|
- if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
|
- continue
|
|
|
|
- }else {
|
|
|
|
- reason = reason + "标题关键词且包含关系"
|
|
|
|
|
|
+ if letter1==letter2 {
|
|
|
|
+ reason = reason + "标题关键词相等关系"
|
|
if !againRepeat(v, info) {//继续二级金额判断
|
|
if !againRepeat(v, info) {//继续二级金额判断
|
|
b = true
|
|
b = true
|
|
source = v
|
|
source = v
|
|
reasons = reason
|
|
reasons = reason
|
|
break L
|
|
break L
|
|
|
|
+ }else {
|
|
|
|
+ if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
|
+ //无包含关系-即不相等
|
|
|
|
+ continue
|
|
|
|
+ }else {
|
|
|
|
+ //有包含关系走要素判重逻辑
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -413,8 +432,25 @@ L:
|
|
}
|
|
}
|
|
//替换原始数据池
|
|
//替换原始数据池
|
|
func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
- ct := newData.comeintime
|
|
|
|
|
|
+ //删除数据池的老数据
|
|
|
|
+ ct_old := oldData.comeintime
|
|
if Is_Sort||TimingTask {
|
|
if Is_Sort||TimingTask {
|
|
|
|
+ ct_old = oldData.publishtime
|
|
|
|
+ }
|
|
|
|
+ dkey_old := qutil.FormatDateByInt64(&ct_old, qutil.Date_yyyyMMdd)
|
|
|
|
+ k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
|
|
|
|
+ data_old := d.data[k_old]
|
|
|
|
+ for k, v := range data_old {
|
|
|
|
+ if v.id == oldData.id {//删除对应当前的老数据
|
|
|
|
+ data_old = append(data_old[:k], data_old[k+1:]...)
|
|
|
|
+ break
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ d.data[k_old] = data_old
|
|
|
|
+
|
|
|
|
+ //添加新的
|
|
|
|
+ ct := newData.comeintime
|
|
|
|
+ if Is_Sort ||TimingTask{
|
|
ct = newData.publishtime
|
|
ct = newData.publishtime
|
|
}
|
|
}
|
|
dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
|
|
dkey := qutil.FormatDateByInt64(&ct, qutil.Date_yyyyMMdd)
|
|
@@ -426,44 +462,12 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
d.data[k] = data
|
|
d.data[k] = data
|
|
if !d.keys[dkey] {
|
|
if !d.keys[dkey] {
|
|
d.keys[dkey] = true
|
|
d.keys[dkey] = true
|
|
|
|
+ d.update(ct)
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
- //遍历替换
|
|
|
|
- isReplace := false
|
|
|
|
- for k, v := range data {
|
|
|
|
- if v.id == oldData.id {
|
|
|
|
- data[k] = newData //同天_type_area 替换
|
|
|
|
- isReplace = true
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- if !isReplace {
|
|
|
|
- //添加新数据 删除老数据
|
|
|
|
- data = append(data,newData)
|
|
|
|
- ct_old := oldData.comeintime
|
|
|
|
- if Is_Sort||TimingTask {
|
|
|
|
- ct_old = oldData.publishtime
|
|
|
|
- }
|
|
|
|
- dkey_old := qutil.FormatDateByInt64(&ct_old, qutil.Date_yyyyMMdd)
|
|
|
|
- k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
|
|
|
|
- data_old := d.data[k_old]
|
|
|
|
- if len(data_old)==1 {
|
|
|
|
- delete(d.data ,k_old)
|
|
|
|
- } else {
|
|
|
|
- for k, v := range data_old {
|
|
|
|
- if v.id == oldData.id {
|
|
|
|
- //删除对应当前的
|
|
|
|
- data_old = append(data_old[:k], data_old[k+1:]...)
|
|
|
|
- break
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
- d.data[k_old] = data_old
|
|
|
|
- }
|
|
|
|
- }else {
|
|
|
|
- d.data[k] = data
|
|
|
|
- }
|
|
|
|
|
|
+ data = append(data, newData)
|
|
|
|
+ d.data[k] = data
|
|
}
|
|
}
|
|
-
|
|
|
|
//添加省
|
|
//添加省
|
|
isAreaExist :=false
|
|
isAreaExist :=false
|
|
for _,v:= range d.areakeys {
|
|
for _,v:= range d.areakeys {
|
|
@@ -477,6 +481,7 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
d.areakeys = areaArr
|
|
d.areakeys = areaArr
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
d.lock.Unlock()
|
|
d.lock.Unlock()
|
|
}
|
|
}
|
|
|
|
|
|
@@ -554,7 +559,7 @@ func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
|
|
return newStr1,newStr2
|
|
return newStr1,newStr2
|
|
}
|
|
}
|
|
//关键词数量v
|
|
//关键词数量v
|
|
-func dealWithSpecialWordNumber(info *Info, v*Info) int {
|
|
|
|
|
|
+func dealWithSpecialWordNumber(info*Info,v*Info) int {
|
|
okNum:=0
|
|
okNum:=0
|
|
if info.titleSpecialWord || info.specialWord {
|
|
if info.titleSpecialWord || info.specialWord {
|
|
okNum++
|
|
okNum++
|
|
@@ -615,7 +620,7 @@ func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
|
|
reason = reason + "---招标类:预算"
|
|
reason = reason + "---招标类:预算"
|
|
return true,reason
|
|
return true,reason
|
|
}
|
|
}
|
|
- if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
|
|
|
|
|
|
+ if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
|
|
reason = reason + "---招标类:开标时间"
|
|
reason = reason + "---招标类:开标时间"
|
|
return true,reason
|
|
return true,reason
|
|
}
|
|
}
|
|
@@ -639,7 +644,7 @@ func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
|
|
reason = reason + "---合同类:预算"
|
|
reason = reason + "---合同类:预算"
|
|
return true,reason
|
|
return true,reason
|
|
}
|
|
}
|
|
- if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
|
|
|
|
|
|
+ if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
|
|
reason = reason + "---合同类:开标时间"
|
|
reason = reason + "---合同类:开标时间"
|
|
return true,reason
|
|
return true,reason
|
|
}
|
|
}
|
|
@@ -661,7 +666,7 @@ func judgeLowQualityData(v *Info, info *Info, reason string) (bool, string) {
|
|
reason = reason + "---类别空-招标类:预算"
|
|
reason = reason + "---类别空-招标类:预算"
|
|
return true,reason
|
|
return true,reason
|
|
}
|
|
}
|
|
- if info.bidopentime != 0 && info.bidopentime == v.bidopentime{//开标时间
|
|
|
|
|
|
+ if info.bidopentime != 0 && info.bidopentime==v.bidopentime{//开标时间
|
|
reason = reason + "---类别空-招标类:开标时间"
|
|
reason = reason + "---类别空-招标类:开标时间"
|
|
return true,reason
|
|
return true,reason
|
|
}
|
|
}
|
|
@@ -834,7 +839,7 @@ func tenderRepeat_A(v *Info, info *Info, reason string) (bool, string) {
|
|
p4 = true
|
|
p4 = true
|
|
}
|
|
}
|
|
if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
|
|
if v.bidopentime != 0 && v.bidopentime == info.bidopentime {
|
|
- ss = ss + "p9-开标时间-"
|
|
|
|
|
|
+ ss = ss + "p9-开标时间相同-"
|
|
p9 = true
|
|
p9 = true
|
|
}
|
|
}
|
|
if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
|
|
if v.bidopenaddress != "" && v.bidopenaddress == info.bidopenaddress {
|
|
@@ -904,16 +909,16 @@ func tenderRepeat_B(v *Info, info *Info, reason string) (bool, string) {
|
|
func tenderRepeat_C(v *Info, info *Info) bool {
|
|
func tenderRepeat_C(v *Info, info *Info) bool {
|
|
|
|
|
|
if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
|
|
if v.budget != 0 && info.budget != 0 && v.budget != info.budget {
|
|
-
|
|
|
|
return true
|
|
return true
|
|
}
|
|
}
|
|
//原始地址...
|
|
//原始地址...
|
|
if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
|
|
if v.buyer != "" && info.buyer != "" && v.buyer != info.buyer {
|
|
return true
|
|
return true
|
|
}
|
|
}
|
|
- //if v.bidopentime != 0 && info.bidopentime != 0 && v.bidopentime != info.bidopentime {
|
|
|
|
- // return true
|
|
|
|
- //}
|
|
|
|
|
|
+
|
|
|
|
+ if v.bidopentime != 0 && info.bidopentime != 0 && isBidopentimeInterval(info.bidopentime,v.bidopentime) {
|
|
|
|
+ return true
|
|
|
|
+ }
|
|
//if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
|
|
//if v.bidopenaddress != "" && info.bidopenaddress != "" && v.bidopenaddress != info.bidopenaddress {
|
|
// return true
|
|
// return true
|
|
//}
|
|
//}
|
|
@@ -1093,7 +1098,10 @@ func againRepeat(v *Info, info *Info) bool {
|
|
return true
|
|
return true
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
-
|
|
|
|
|
|
+ //预算金额满足条件
|
|
|
|
+ if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
|
|
|
|
+ return true
|
|
|
|
+ }
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -1126,8 +1134,40 @@ func isBidWinningAmount(f1 float64 ,f2 float64) bool {
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+//开标时间区间为一天
|
|
|
|
+func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
|
|
|
|
+ if i1==0||i2==0 {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ //不在同一天-或者同一天间隔超过六小时,属于不相等返回true
|
|
|
|
+ timeOne,timeTwo:=i1,i2
|
|
|
|
+ day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
|
|
|
|
+ day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
|
|
|
|
+ if day1==day2 {
|
|
|
|
+ //是否间隔超过六小时
|
|
|
|
+ if math.Abs(float64(i1-i2)) >21600.0 {
|
|
|
|
+ return true
|
|
|
|
+ }else {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ }else {
|
|
|
|
+ return true
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
|
|
-
|
|
|
|
|
|
+//开标时间区间为一天
|
|
|
|
+func isTheSameDay(i1 int64 ,i2 int64) bool {
|
|
|
|
+ if i1==0||i2==0 {
|
|
|
|
+ return false
|
|
|
|
+ }
|
|
|
|
+ timeOne,timeTwo:=i1,i2
|
|
|
|
+ day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
|
|
|
|
+ day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
|
|
|
|
+ if day1==day2 {
|
|
|
|
+ return true
|
|
|
|
+ }
|
|
|
|
+ return false
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
|
|
|