|
@@ -121,7 +121,7 @@ func TimedTaskDatamap(days int,lasttime int64) *datamap {
|
|
|
|
|
|
|
|
|
func NewDatamap(days int, lastid string) *datamap {
|
|
|
- datelimit = qutil.Float64All(days * 86400)
|
|
|
+ datelimit = qutil.Float64All(days * 86400 * 2)
|
|
|
dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
|
|
|
if lastid == "" {
|
|
|
return dm
|
|
@@ -239,7 +239,6 @@ func NewInfo(tmp map[string]interface{}) *Info {
|
|
|
|
|
|
//判重方法
|
|
|
func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
|
|
|
-
|
|
|
reason := ""
|
|
|
keys := []string{}
|
|
|
d.lock.Lock()
|
|
@@ -256,6 +255,8 @@ func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
|
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
|
|
|
|
|
|
}
|
|
|
+
|
|
|
+
|
|
|
d.lock.Unlock()
|
|
|
L:
|
|
|
for _, k := range keys {
|
|
@@ -268,6 +269,9 @@ L:
|
|
|
if v.id == info.id { //正常重复
|
|
|
return false, v, ""
|
|
|
}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
if info.site != "" {//站点临时赋值
|
|
|
sitelock.Lock()
|
|
|
dict := SiteMap[info.site]
|
|
@@ -292,11 +296,13 @@ L:
|
|
|
}
|
|
|
if info.href != "" && info.href != v.href {
|
|
|
if v.title==info.title&&len([]rune(info.title)) >10 && isTheSameDay(info.publishtime,v.publishtime){
|
|
|
- reason = "同站点-href不同-标题相同"
|
|
|
- b = true
|
|
|
- source = v
|
|
|
- reasons = reason
|
|
|
- break L
|
|
|
+ if !againHrefRepeat(v, info) {//进行同站点二次判断
|
|
|
+ reason = "同站点-href不同-标题相同等"
|
|
|
+ b = true
|
|
|
+ source = v
|
|
|
+ reasons = reason
|
|
|
+ break L
|
|
|
+ }
|
|
|
}else {
|
|
|
continue
|
|
|
}
|
|
@@ -325,18 +331,16 @@ L:
|
|
|
}
|
|
|
if letter1==letter2 {
|
|
|
reason = reason + "标题关键词相等关系"
|
|
|
- if !againRepeat(v, info) {//继续二级金额判断
|
|
|
+ if !againRepeat(v, info) {//进行二级金额判断
|
|
|
b = true
|
|
|
source = v
|
|
|
reasons = reason
|
|
|
break L
|
|
|
- }else {
|
|
|
- if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
- //无包含关系-即不相等
|
|
|
- continue
|
|
|
- }else {
|
|
|
- //有包含关系走要素判重逻辑
|
|
|
- }
|
|
|
+ }
|
|
|
+ }else {
|
|
|
+ if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
|
+ //无包含关系-即不相等
|
|
|
+ continue
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -487,13 +491,19 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
|
|
|
|
|
|
|
func (d *datamap) update(t int64) {
|
|
|
- //每天0点清除历史数据
|
|
|
- d.keymap = d.GetLatelyFiveDay(t)
|
|
|
+
|
|
|
+ if TimingTask {
|
|
|
+ d.keymap = d.GetLatelyFiveDay(t)
|
|
|
+ }else {
|
|
|
+ //d.keymap = d.GetLatelyFiveDay(t)//测试数据采用
|
|
|
+ d.keymap = d.GetLatelyFiveDayDouble(t)
|
|
|
+ }
|
|
|
m := map[string]bool{}
|
|
|
for _, v := range d.keymap {
|
|
|
m[v] = true
|
|
|
}
|
|
|
all, all1 := 0, 0
|
|
|
+
|
|
|
for k, v := range d.data {
|
|
|
all += len(v)
|
|
|
if !m[k[:8]] {
|
|
@@ -521,6 +531,16 @@ func (d *datamap) GetLatelyFiveDay(t int64) []string {
|
|
|
return array
|
|
|
}
|
|
|
|
|
|
+func (d *datamap) GetLatelyFiveDayDouble(t int64) []string {//增量-两倍
|
|
|
+ array := make([]string, d.days*2)
|
|
|
+ now := time.Now()
|
|
|
+ for i := 0; i < d.days*2; i++ {
|
|
|
+ array[i] = now.Format(qutil.Date_yyyyMMdd)
|
|
|
+ now = now.AddDate(0, 0, -1)
|
|
|
+ }
|
|
|
+ return array
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
**************************
|
|
|
******** 以下为判重 ********
|
|
@@ -1077,7 +1097,51 @@ func contractRepeat_C(v *Info, info *Info) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//再次金额判断
|
|
|
+//同站点再次判断
|
|
|
+func againHrefRepeat(v *Info, info *Info) bool {
|
|
|
+ if v.buyer == info.buyer {
|
|
|
+ if info.subtype == "招标" || info.subtype == "邀标" || info.subtype == "询价" ||
|
|
|
+ info.subtype == "竞谈" || info.subtype == "单一" || info.subtype == "竞价" ||
|
|
|
+ info.subtype == "变更" || info.subtype == "其他" {
|
|
|
+ //招标结果
|
|
|
+ if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ } else if info.subtype == "中标" || info.subtype == "成交" || info.subtype == "废标" || info.subtype == "流标" {
|
|
|
+ //中标结果
|
|
|
+ if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
|
|
|
+ (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ } else if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
|
|
|
+ //合同
|
|
|
+ if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ if (isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0) ||
|
|
|
+ (deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "") {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
|
|
|
+ return true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return false
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+//关键词再次金额判断
|
|
|
func againRepeat(v *Info, info *Info) bool {
|
|
|
//相同采购单位下
|
|
|
//if info.buyer != "" && v.buyer == info.buyer {
|