|
@@ -2,6 +2,7 @@ package main
|
|
|
|
|
|
import (
|
|
import (
|
|
"fmt"
|
|
"fmt"
|
|
|
|
+ "go.mongodb.org/mongo-driver/bson/primitive"
|
|
"log"
|
|
"log"
|
|
qutil "qfw/util"
|
|
qutil "qfw/util"
|
|
"reflect"
|
|
"reflect"
|
|
@@ -12,32 +13,31 @@ import (
|
|
)
|
|
)
|
|
|
|
|
|
type Info struct {
|
|
type Info struct {
|
|
- id string //id
|
|
|
|
- title string //标题
|
|
|
|
- spidercode string //爬虫代码
|
|
|
|
- area string //省份
|
|
|
|
- city string //城市
|
|
|
|
- subtype string //信息类型
|
|
|
|
- buyer string //采购单位
|
|
|
|
- agency string //代理机构
|
|
|
|
- winner string //中标单位
|
|
|
|
- budget float64 //预算金额
|
|
|
|
- bidamount float64 //中标金额
|
|
|
|
- projectname string //项目名称
|
|
|
|
- projectcode string //项目编号
|
|
|
|
- contractnumber string //合同编号
|
|
|
|
- publishtime int64 //发布时间
|
|
|
|
- comeintime int64 //入库时间
|
|
|
|
- bidopentime int64 //开标时间
|
|
|
|
- bidopenaddress string //开标地点
|
|
|
|
- site string //站点
|
|
|
|
- href string //正文的url
|
|
|
|
- repeatid string //重复id
|
|
|
|
- titleSpecialWord bool //标题特殊词
|
|
|
|
- specialWord bool //再次判断的特殊词
|
|
|
|
- mergemap map[string]interface{} //合并记录
|
|
|
|
- is_site bool //是否站点城市
|
|
|
|
- repeat_ids []string //记录所有重复id
|
|
|
|
|
|
+ id string //id
|
|
|
|
+ title string //标题
|
|
|
|
+ spidercode string //爬虫代码
|
|
|
|
+ area string //省份
|
|
|
|
+ city string //城市
|
|
|
|
+ subtype string //信息类型
|
|
|
|
+ buyer string //采购单位
|
|
|
|
+ agency string //代理机构
|
|
|
|
+ winner string //中标单位
|
|
|
|
+ budget float64 //预算金额
|
|
|
|
+ bidamount float64 //中标金额
|
|
|
|
+ projectname string //项目名称
|
|
|
|
+ projectcode string //项目编号
|
|
|
|
+ contractnumber string //合同编号
|
|
|
|
+ publishtime int64 //发布时间
|
|
|
|
+ comeintime int64 //入库时间
|
|
|
|
+ bidopentime int64 //开标时间
|
|
|
|
+ bidopenaddress string //开标地点
|
|
|
|
+ site string //站点
|
|
|
|
+ href string //正文的url
|
|
|
|
+ repeatid string //重复id
|
|
|
|
+ titleSpecialWord bool //标题特殊词
|
|
|
|
+ specialWord bool //再次判断的特殊词
|
|
|
|
+ is_site bool //是否站点城市
|
|
|
|
+ repeat_ids []string //记录所有重复id
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
@@ -46,19 +46,19 @@ var sitelock sync.Mutex //锁
|
|
|
|
|
|
//一般数据判重
|
|
//一般数据判重
|
|
type datamap struct {
|
|
type datamap struct {
|
|
- lock sync.Mutex //锁
|
|
|
|
- days int //保留几天数据
|
|
|
|
- data map[string][]*Info
|
|
|
|
- keymap []string
|
|
|
|
|
|
+ lock sync.Mutex //锁
|
|
|
|
+ days int //保留几天数据
|
|
|
|
+ data map[string][]*Info
|
|
|
|
+ keymap []string
|
|
areakeys []string
|
|
areakeys []string
|
|
- keys map[string]bool
|
|
|
|
|
|
+ keys map[string]bool
|
|
}
|
|
}
|
|
|
|
|
|
//历史~存量
|
|
//历史~存量
|
|
-func TimedTaskDatamap(days int,lasttime int64,numIndex int) *datamap {
|
|
|
|
|
|
+func TimedTaskDatamap(days int, lasttime int64, numIndex int) *datamap {
|
|
datelimit = qutil.Float64All(days * 86400)
|
|
datelimit = qutil.Float64All(days * 86400)
|
|
- dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{},map[string]bool{}}
|
|
|
|
- if lasttime <0 {
|
|
|
|
|
|
+ dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{}, map[string]bool{}}
|
|
|
|
+ if lasttime < 0 {
|
|
log.Println("数据池空数据")
|
|
log.Println("数据池空数据")
|
|
return dm
|
|
return dm
|
|
}
|
|
}
|
|
@@ -73,13 +73,13 @@ func TimedTaskDatamap(days int,lasttime int64,numIndex int) *datamap {
|
|
n, continuSum := 0, 0
|
|
n, continuSum := 0, 0
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
if n%10000 == 0 {
|
|
if n%10000 == 0 {
|
|
- log.Println("当前 n:", n,"数量:" ,continuSum,tmp["_id"],tmp["publishtime"])
|
|
|
|
|
|
+ log.Println("当前 n:", n, "数量:", continuSum, tmp["_id"], tmp["publishtime"])
|
|
}
|
|
}
|
|
if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 ||
|
|
if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 ||
|
|
qutil.IntAll(tmp["dataging"]) == 1 {
|
|
qutil.IntAll(tmp["dataging"]) == 1 {
|
|
|
|
|
|
} else {
|
|
} else {
|
|
- if fmt.Sprint(reflect.TypeOf(tmp["publishtime"]))=="string" {
|
|
|
|
|
|
+ if fmt.Sprint(reflect.TypeOf(tmp["publishtime"])) == "string" {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
pt := tmp["publishtime"]
|
|
pt := tmp["publishtime"]
|
|
@@ -101,15 +101,15 @@ func TimedTaskDatamap(days int,lasttime int64,numIndex int) *datamap {
|
|
dm.data[k] = data
|
|
dm.data[k] = data
|
|
dm.keys[dkey] = true
|
|
dm.keys[dkey] = true
|
|
//添加省
|
|
//添加省
|
|
- isAreaExist :=false
|
|
|
|
- for _,v:= range dm.areakeys {
|
|
|
|
- if v==info.area {
|
|
|
|
|
|
+ isAreaExist := false
|
|
|
|
+ for _, v := range dm.areakeys {
|
|
|
|
+ if v == info.area {
|
|
isAreaExist = true
|
|
isAreaExist = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if !isAreaExist {
|
|
if !isAreaExist {
|
|
areaArr := dm.areakeys
|
|
areaArr := dm.areakeys
|
|
- areaArr = append(areaArr,info.area)
|
|
|
|
|
|
+ areaArr = append(areaArr, info.area)
|
|
dm.areakeys = areaArr
|
|
dm.areakeys = areaArr
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
@@ -120,7 +120,7 @@ func TimedTaskDatamap(days int,lasttime int64,numIndex int) *datamap {
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
}
|
|
}
|
|
|
|
|
|
- log.Printf("第%d组:数据池构建完成:%d秒,%d个\n",numIndex ,int(time.Now().Unix())-start, n)
|
|
|
|
|
|
+ log.Printf("第%d组:数据池构建完成:%d秒,%d个\n", numIndex, int(time.Now().Unix())-start, n)
|
|
|
|
|
|
return dm
|
|
return dm
|
|
}
|
|
}
|
|
@@ -128,7 +128,7 @@ func TimedTaskDatamap(days int,lasttime int64,numIndex int) *datamap {
|
|
//增量
|
|
//增量
|
|
func NewDatamap(days int, lastid string) *datamap {
|
|
func NewDatamap(days int, lastid string) *datamap {
|
|
datelimit = qutil.Float64All(days * 86400 * 2)
|
|
datelimit = qutil.Float64All(days * 86400 * 2)
|
|
- dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{},[]string{}, map[string]bool{}}
|
|
|
|
|
|
+ dm := &datamap{sync.Mutex{}, days, map[string][]*Info{}, []string{}, []string{}, map[string]bool{}}
|
|
if lastid == "" {
|
|
if lastid == "" {
|
|
log.Println("不构建数据池")
|
|
log.Println("不构建数据池")
|
|
return dm
|
|
return dm
|
|
@@ -141,7 +141,7 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
}}
|
|
}}
|
|
log.Println("query", query)
|
|
log.Println("query", query)
|
|
it := sess.DB(data_mgo.DbName).C(extract).Find(query).Sort("-publishtime").Iter()
|
|
it := sess.DB(data_mgo.DbName).C(extract).Find(query).Sort("-publishtime").Iter()
|
|
- nowTime := time.Now().Unix()//当前时间的时间戳
|
|
|
|
|
|
+ nowTime := time.Now().Unix() //当前时间的时间戳
|
|
n, continuSum := 0, 0
|
|
n, continuSum := 0, 0
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
for tmp := make(map[string]interface{}); it.Next(&tmp); n++ {
|
|
|
|
|
|
@@ -149,14 +149,13 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
//if util.IntAll((*source)["sourcewebsite"]) == 1 {
|
|
//if util.IntAll((*source)["sourcewebsite"]) == 1 {
|
|
// continue
|
|
// continue
|
|
//}
|
|
//}
|
|
-
|
|
|
|
- if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1{
|
|
|
|
|
|
+ if qutil.IntAll(tmp["repeat"]) == 1 || qutil.IntAll(tmp["repeat"]) == -1 {
|
|
|
|
|
|
} else {
|
|
} else {
|
|
- if fmt.Sprint(reflect.TypeOf(tmp["publishtime"]))=="string" {
|
|
|
|
|
|
+ if fmt.Sprint(reflect.TypeOf(tmp["publishtime"])) == "string" {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
- pt:= tmp["publishtime"]
|
|
|
|
|
|
+ pt := tmp["publishtime"]
|
|
pt_time := qutil.Int64All(pt)
|
|
pt_time := qutil.Int64All(pt)
|
|
if pt_time > time.Now().Unix() {
|
|
if pt_time > time.Now().Unix() {
|
|
continue
|
|
continue
|
|
@@ -174,15 +173,15 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
dm.data[k] = data
|
|
dm.data[k] = data
|
|
dm.keys[dkey] = true
|
|
dm.keys[dkey] = true
|
|
//添加省
|
|
//添加省
|
|
- isAreaExist :=false
|
|
|
|
- for _,v:= range dm.areakeys {
|
|
|
|
- if v==info.area {
|
|
|
|
|
|
+ isAreaExist := false
|
|
|
|
+ for _, v := range dm.areakeys {
|
|
|
|
+ if v == info.area {
|
|
isAreaExist = true
|
|
isAreaExist = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if !isAreaExist {
|
|
if !isAreaExist {
|
|
areaArr := dm.areakeys
|
|
areaArr := dm.areakeys
|
|
- areaArr = append(areaArr,info.area)
|
|
|
|
|
|
+ areaArr = append(areaArr, info.area)
|
|
dm.areakeys = areaArr
|
|
dm.areakeys = areaArr
|
|
}
|
|
}
|
|
} else {
|
|
} else {
|
|
@@ -190,19 +189,19 @@ func NewDatamap(days int, lastid string) *datamap {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if n%10000 == 0 {
|
|
if n%10000 == 0 {
|
|
- log.Println("当前 n:", n,"数量:" ,continuSum,tmp["_id"])
|
|
|
|
|
|
+ log.Println("当前 n:", n, "数量:", continuSum, tmp["_id"])
|
|
}
|
|
}
|
|
tmp = make(map[string]interface{})
|
|
tmp = make(map[string]interface{})
|
|
}
|
|
}
|
|
- log.Println("load data:", n,"总数:",continuSum)
|
|
|
|
|
|
+ log.Println("load data:", n, "总数:", continuSum)
|
|
return dm
|
|
return dm
|
|
}
|
|
}
|
|
|
|
|
|
//数据构建
|
|
//数据构建
|
|
func NewInfo(tmp map[string]interface{}) *Info {
|
|
func NewInfo(tmp map[string]interface{}) *Info {
|
|
subtype := qutil.ObjToString(tmp["subtype"])
|
|
subtype := qutil.ObjToString(tmp["subtype"])
|
|
- if subtype=="招标"||subtype=="邀标"||subtype=="询价"||
|
|
|
|
- subtype=="竞谈"||subtype=="竞价" {
|
|
|
|
|
|
+ if subtype == "招标" || subtype == "邀标" || subtype == "询价" ||
|
|
|
|
+ subtype == "竞谈" || subtype == "竞价" {
|
|
subtype = "招标"
|
|
subtype = "招标"
|
|
}
|
|
}
|
|
|
|
|
|
@@ -233,16 +232,17 @@ func NewInfo(tmp map[string]interface{}) *Info {
|
|
info.href = qutil.ObjToString(tmp["href"])
|
|
info.href = qutil.ObjToString(tmp["href"])
|
|
info.repeatid = qutil.ObjToString(tmp["repeatid"])
|
|
info.repeatid = qutil.ObjToString(tmp["repeatid"])
|
|
info.specialWord = FilterRegTitle.MatchString(info.title)
|
|
info.specialWord = FilterRegTitle.MatchString(info.title)
|
|
- info.titleSpecialWord = FilterRegTitle_0.MatchString(info.title) ||FilterRegTitle_1.MatchString(info.title) || FilterRegTitle_2.MatchString(info.title)
|
|
|
|
- info.mergemap = *qutil.ObjToMap(tmp["merge"])
|
|
|
|
- if info.mergemap == nil {
|
|
|
|
- info.mergemap = make(map[string]interface{}, 0)
|
|
|
|
|
|
+ info.titleSpecialWord = FilterRegTitle_0.MatchString(info.title) || FilterRegTitle_1.MatchString(info.title) || FilterRegTitle_2.MatchString(info.title)
|
|
|
|
+
|
|
|
|
+ //加载repeat_ids数据
|
|
|
|
+ repeat_ids := []string{}
|
|
|
|
+ if ids_1, ok := tmp["repeat_ids"].([]interface{}); ok {
|
|
|
|
+ repeat_ids = qutil.ObjArrToStringArr(ids_1)
|
|
}
|
|
}
|
|
- if info.repeat_ids == nil {
|
|
|
|
- info.repeat_ids = make([]string, 0)
|
|
|
|
|
|
+ if ids_2, ok := tmp["repeat_ids"].(primitive.A); ok {
|
|
|
|
+ repeat_ids = qutil.ObjArrToStringArr(ids_2)
|
|
}
|
|
}
|
|
-
|
|
|
|
-
|
|
|
|
|
|
+ info.repeat_ids = repeat_ids
|
|
|
|
|
|
info.is_site = false
|
|
info.is_site = false
|
|
|
|
|
|
@@ -258,11 +258,11 @@ func (d *datamap) check(info *Info) (b bool, source *Info, reasons string) {
|
|
keys := []string{}
|
|
keys := []string{}
|
|
d.lock.Lock()
|
|
d.lock.Lock()
|
|
for k, _ := range d.keys { //不同时间段
|
|
for k, _ := range d.keys { //不同时间段
|
|
- if info.area=="全国" {//匹配所有省
|
|
|
|
- for _,v := range d.areakeys{
|
|
|
|
|
|
+ if info.area == "全国" { //匹配所有省
|
|
|
|
+ for _, v := range d.areakeys {
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v))
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, v))
|
|
}
|
|
}
|
|
- }else {//匹配指定省
|
|
|
|
|
|
+ } else { //匹配指定省
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, info.area))
|
|
}
|
|
}
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
|
|
keys = append(keys, fmt.Sprintf("%s_%s_%s", k, info.subtype, "全国"))
|
|
@@ -281,21 +281,21 @@ L:
|
|
return false, v, ""
|
|
return false, v, ""
|
|
}
|
|
}
|
|
//buyer 优先级高,有值且不相等过滤
|
|
//buyer 优先级高,有值且不相等过滤
|
|
- if info.buyer!=""&&v.buyer!=""&&info.buyer!=v.buyer {
|
|
|
|
|
|
+ if info.buyer != "" && v.buyer != "" && info.buyer != v.buyer {
|
|
if v.title != info.title && v.title != "" && info.title != "" {
|
|
if v.title != info.title && v.title != "" && info.title != "" {
|
|
isTestLog = true
|
|
isTestLog = true
|
|
}
|
|
}
|
|
- if buyerIsContinue(v,info) {
|
|
|
|
|
|
+ if buyerIsContinue(v, info) {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- if info.site != "" {//站点临时赋值
|
|
|
|
|
|
+ if info.site != "" { //站点临时赋值
|
|
sitelock.Lock()
|
|
sitelock.Lock()
|
|
dict := SiteMap[info.site]
|
|
dict := SiteMap[info.site]
|
|
sitelock.Unlock()
|
|
sitelock.Unlock()
|
|
if dict != nil {
|
|
if dict != nil {
|
|
- if (info.area == "全国" && dict["area"] != "")||
|
|
|
|
- (info.city == "" && dict["city"] != ""){
|
|
|
|
|
|
+ if (info.area == "全国" && dict["area"] != "") ||
|
|
|
|
+ (info.city == "" && dict["city"] != "") {
|
|
info.is_site = true
|
|
info.is_site = true
|
|
info.area = qutil.ObjToString(dict["area"])
|
|
info.area = qutil.ObjToString(dict["area"])
|
|
info.city = qutil.ObjToString(dict["city"])
|
|
info.city = qutil.ObjToString(dict["city"])
|
|
@@ -304,7 +304,7 @@ L:
|
|
}
|
|
}
|
|
|
|
|
|
//前置条件-五要素均相等
|
|
//前置条件-五要素均相等
|
|
- if leadingElementSame(v,info) {
|
|
|
|
|
|
+ if leadingElementSame(v, info) {
|
|
reason = "五要素-相同-满足"
|
|
reason = "五要素-相同-满足"
|
|
b = true
|
|
b = true
|
|
source = v
|
|
source = v
|
|
@@ -322,64 +322,62 @@ L:
|
|
break L
|
|
break L
|
|
}
|
|
}
|
|
//相同发布时间-标题无包含关系 - 项目名称不等
|
|
//相同发布时间-标题无包含关系 - 项目名称不等
|
|
- if isTheSameDay(info.publishtime,v.publishtime) {
|
|
|
|
- if !isTheSimilarName(info.title,v.title){
|
|
|
|
|
|
+ if isTheSameDay(info.publishtime, v.publishtime) {
|
|
|
|
+ if !isTheSimilarName(info.title, v.title) {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//
|
|
//
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
//不同href
|
|
//不同href
|
|
if info.href != "" && info.href != v.href {
|
|
if info.href != "" && info.href != v.href {
|
|
- if v.title==info.title{
|
|
|
|
- if !againRepeat(v, info,true) {//进行同站点二次判断
|
|
|
|
|
|
+ if v.title == info.title {
|
|
|
|
+ if !againRepeat(v, info, true) { //进行同站点二次判断
|
|
reason = "同站点-href不同-标题相同等"
|
|
reason = "同站点-href不同-标题相同等"
|
|
b = true
|
|
b = true
|
|
source = v
|
|
source = v
|
|
reasons = reason
|
|
reasons = reason
|
|
break L
|
|
break L
|
|
- }else {
|
|
|
|
|
|
+ } else {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
- }else {
|
|
|
|
- if againRepeat(v, info,true) {
|
|
|
|
|
|
+ } else {
|
|
|
|
+ if againRepeat(v, info, true) {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//特殊词处理
|
|
//特殊词处理
|
|
- specialNum:= dealWithSpecialWordNumber(info,v)
|
|
|
|
|
|
+ specialNum := dealWithSpecialWordNumber(info, v)
|
|
//前置条件 - 标题相关,有且一个关键词
|
|
//前置条件 - 标题相关,有且一个关键词
|
|
- if specialNum==1 {
|
|
|
|
- if againRepeat(v, info,false) {
|
|
|
|
|
|
+ if specialNum == 1 {
|
|
|
|
+ if againRepeat(v, info, false) {
|
|
continue
|
|
continue
|
|
}
|
|
}
|
|
}
|
|
}
|
|
//前置条件3 - 标题相关,均含有关键词
|
|
//前置条件3 - 标题相关,均含有关键词
|
|
- if specialNum==2 {
|
|
|
|
|
|
+ if specialNum == 2 {
|
|
if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
|
|
if len([]rune(v.title)) > 10 && len([]rune(info.title)) > 10 &&
|
|
v.title != "" && info.title != "" {
|
|
v.title != "" && info.title != "" {
|
|
- letter1,letter2:=v.title,info.title
|
|
|
|
- res, _ := regexp.Compile("[0-9a-zA-Z]+");
|
|
|
|
- if res.MatchString(letter1)||res.MatchString(letter2) {
|
|
|
|
- letter1=convertArabicNumeralsAndLetters(letter1)
|
|
|
|
- letter2=convertArabicNumeralsAndLetters(letter2)
|
|
|
|
|
|
+ letter1, letter2 := v.title, info.title
|
|
|
|
+ res, _ := regexp.Compile("[0-9a-zA-Z]+")
|
|
|
|
+ if res.MatchString(letter1) || res.MatchString(letter2) {
|
|
|
|
+ letter1 = convertArabicNumeralsAndLetters(letter1)
|
|
|
|
+ letter2 = convertArabicNumeralsAndLetters(letter2)
|
|
}
|
|
}
|
|
- if strings.Contains(letter1,"重新招标")|| strings.Contains(letter2,"重新招标"){
|
|
|
|
- letter1,letter2=dealWithSpecialPhrases(letter1,letter2)
|
|
|
|
|
|
+ if strings.Contains(letter1, "重新招标") || strings.Contains(letter2, "重新招标") {
|
|
|
|
+ letter1, letter2 = dealWithSpecialPhrases(letter1, letter2)
|
|
}
|
|
}
|
|
- if letter1==letter2 {
|
|
|
|
|
|
+ if letter1 == letter2 {
|
|
reason = reason + "标题关键词相等关系"
|
|
reason = reason + "标题关键词相等关系"
|
|
- if !againRepeat(v, info,false) {//进行二级金额判断
|
|
|
|
|
|
+ if !againRepeat(v, info, false) { //进行二级金额判断
|
|
b = true
|
|
b = true
|
|
source = v
|
|
source = v
|
|
reasons = reason
|
|
reasons = reason
|
|
break L
|
|
break L
|
|
}
|
|
}
|
|
- }else {
|
|
|
|
|
|
+ } else {
|
|
if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
if !(strings.Contains(letter1, letter2) || strings.Contains(letter2, letter1)) {
|
|
//无包含关系-即不相等
|
|
//无包含关系-即不相等
|
|
if againContainSpecialWord(v, info) {
|
|
if againContainSpecialWord(v, info) {
|
|
@@ -390,7 +388,6 @@ L:
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
//新增快速数据过少判重
|
|
//新增快速数据过少判重
|
|
if LowHeavy {
|
|
if LowHeavy {
|
|
repeat := false
|
|
repeat := false
|
|
@@ -459,15 +456,15 @@ L:
|
|
}
|
|
}
|
|
|
|
|
|
//添加省
|
|
//添加省
|
|
- isAreaExist :=false
|
|
|
|
- for _,v:= range d.areakeys {
|
|
|
|
- if v==info.area {
|
|
|
|
|
|
+ isAreaExist := false
|
|
|
|
+ for _, v := range d.areakeys {
|
|
|
|
+ if v == info.area {
|
|
isAreaExist = true
|
|
isAreaExist = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if !isAreaExist {
|
|
if !isAreaExist {
|
|
areaArr := d.areakeys
|
|
areaArr := d.areakeys
|
|
- areaArr = append(areaArr,info.area)
|
|
|
|
|
|
+ areaArr = append(areaArr, info.area)
|
|
d.areakeys = areaArr
|
|
d.areakeys = areaArr
|
|
}
|
|
}
|
|
|
|
|
|
@@ -475,7 +472,7 @@ L:
|
|
}
|
|
}
|
|
|
|
|
|
if isTestLog {
|
|
if isTestLog {
|
|
- reasons = reasons+"-新修改"
|
|
|
|
|
|
+ reasons = reasons + "-新修改"
|
|
}
|
|
}
|
|
return
|
|
return
|
|
}
|
|
}
|
|
@@ -484,10 +481,10 @@ func (d *datamap) update(t int64) {
|
|
|
|
|
|
if TimingTask {
|
|
if TimingTask {
|
|
|
|
|
|
- }else {
|
|
|
|
|
|
+ } else {
|
|
if IsFull {
|
|
if IsFull {
|
|
- d.keymap = d.GetLatelyFiveDay(t)//全量
|
|
|
|
- }else {
|
|
|
|
|
|
+ d.keymap = d.GetLatelyFiveDay(t) //全量
|
|
|
|
+ } else {
|
|
d.keymap = d.GetLatelyFiveDayDouble(t) //增量
|
|
d.keymap = d.GetLatelyFiveDayDouble(t) //增量
|
|
}
|
|
}
|
|
m := map[string]bool{}
|
|
m := map[string]bool{}
|
|
@@ -508,7 +505,7 @@ func (d *datamap) update(t int64) {
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-func (d *datamap) GetLatelyFiveDay(t int64) []string {
|
|
|
|
|
|
+func (d *datamap) GetLatelyFiveDay(t int64) []string {
|
|
array := make([]string, d.days)
|
|
array := make([]string, d.days)
|
|
now := time.Unix(t, 0)
|
|
now := time.Unix(t, 0)
|
|
for i := 0; i < d.days; i++ {
|
|
for i := 0; i < d.days; i++ {
|
|
@@ -518,7 +515,7 @@ func (d *datamap) GetLatelyFiveDay(t int64) []string {
|
|
return array
|
|
return array
|
|
}
|
|
}
|
|
|
|
|
|
-func (d *datamap) GetLatelyFiveDayDouble(t int64) []string {//增量-两倍
|
|
|
|
|
|
+func (d *datamap) GetLatelyFiveDayDouble(t int64) []string { //增量-两倍
|
|
array := make([]string, d.days*2)
|
|
array := make([]string, d.days*2)
|
|
now := time.Now()
|
|
now := time.Now()
|
|
for i := 0; i < d.days*2; i++ {
|
|
for i := 0; i < d.days*2; i++ {
|
|
@@ -528,8 +525,6 @@ func (d *datamap) GetLatelyFiveDayDouble(t int64) []string {//增量-两倍
|
|
return array
|
|
return array
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
//替换原始数据池-更新
|
|
//替换原始数据池-更新
|
|
func (d *datamap) replacePoolData(newData *Info) {
|
|
func (d *datamap) replacePoolData(newData *Info) {
|
|
d.lock.Lock()
|
|
d.lock.Lock()
|
|
@@ -538,7 +533,7 @@ func (d *datamap) replacePoolData(newData *Info) {
|
|
k := fmt.Sprintf("%s_%s_%s", dkey, newData.subtype, newData.area)
|
|
k := fmt.Sprintf("%s_%s_%s", dkey, newData.subtype, newData.area)
|
|
data := d.data[k]
|
|
data := d.data[k]
|
|
for k, v := range data {
|
|
for k, v := range data {
|
|
- if v.id == newData.id {//替换
|
|
|
|
|
|
+ if v.id == newData.id { //替换
|
|
data[k] = newData
|
|
data[k] = newData
|
|
break
|
|
break
|
|
}
|
|
}
|
|
@@ -547,16 +542,6 @@ func (d *datamap) replacePoolData(newData *Info) {
|
|
d.lock.Unlock()
|
|
d.lock.Unlock()
|
|
}
|
|
}
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
//相互替换数据池-暂时弃用
|
|
//相互替换数据池-暂时弃用
|
|
func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
//删除数据池的老数据
|
|
//删除数据池的老数据
|
|
@@ -565,7 +550,7 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
|
|
k_old := fmt.Sprintf("%s_%s_%s", dkey_old, oldData.subtype, oldData.area)
|
|
data_old := d.data[k_old]
|
|
data_old := d.data[k_old]
|
|
for k, v := range data_old {
|
|
for k, v := range data_old {
|
|
- if v.id == oldData.id {//删除对应当前的老数据
|
|
|
|
|
|
+ if v.id == oldData.id { //删除对应当前的老数据
|
|
data_old = append(data_old[:k], data_old[k+1:]...)
|
|
data_old = append(data_old[:k], data_old[k+1:]...)
|
|
break
|
|
break
|
|
}
|
|
}
|
|
@@ -590,33 +575,26 @@ func (d *datamap) replaceSourceData(newData *Info, oldData *Info) {
|
|
d.data[k] = data
|
|
d.data[k] = data
|
|
}
|
|
}
|
|
//添加省
|
|
//添加省
|
|
- isAreaExist :=false
|
|
|
|
- for _,v:= range d.areakeys {
|
|
|
|
- if v==newData.area {
|
|
|
|
|
|
+ isAreaExist := false
|
|
|
|
+ for _, v := range d.areakeys {
|
|
|
|
+ if v == newData.area {
|
|
isAreaExist = true
|
|
isAreaExist = true
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if !isAreaExist {
|
|
if !isAreaExist {
|
|
areaArr := d.areakeys
|
|
areaArr := d.areakeys
|
|
- areaArr = append(areaArr,newData.area)
|
|
|
|
|
|
+ areaArr = append(areaArr, newData.area)
|
|
d.areakeys = areaArr
|
|
d.areakeys = areaArr
|
|
}
|
|
}
|
|
|
|
|
|
d.lock.Unlock()
|
|
d.lock.Unlock()
|
|
}
|
|
}
|
|
|
|
+
|
|
//总计条数-暂时弃用
|
|
//总计条数-暂时弃用
|
|
func (d *datamap) currentTotalCount() int {
|
|
func (d *datamap) currentTotalCount() int {
|
|
- num:=qutil.IntAll(0)
|
|
|
|
- for _,v:=range d.data {
|
|
|
|
- num = num+qutil.IntAll(len(v))
|
|
|
|
|
|
+ num := qutil.IntAll(0)
|
|
|
|
+ for _, v := range d.data {
|
|
|
|
+ num = num + qutil.IntAll(len(v))
|
|
}
|
|
}
|
|
return num
|
|
return num
|
|
}
|
|
}
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|