|
@@ -13,44 +13,48 @@ import (
|
|
|
var reg_space = regexp.MustCompile("( +)|( +)|(" + string(rune(160)) + "+)")
|
|
|
var reg_han = regexp.MustCompile("[\u4e00-\u9fa5]")
|
|
|
|
|
|
-//var domainReg = regexp.MustCompile(`(?://).+?(?:[::/])`)
|
|
|
-//var domainReg1 = regexp.MustCompile(`(?://)(\w+[-\.]{0,})+`)
|
|
|
+// var domainReg = regexp.MustCompile(`(?://).+?(?:[::/])`)
|
|
|
+// var domainReg1 = regexp.MustCompile(`(?://)(\w+[-\.]{0,})+`)
|
|
|
var domainReg = regexp.MustCompile(`((http|https)[::]//(www\.)?|www\.|WWW\.)(\w+[-\.]{0,})+`)
|
|
|
var domainClearReg = regexp.MustCompile(`((http|https)[::]//)+`)
|
|
|
var htmlModelReg = regexp.MustCompile(`{{[a-zA-z.()\d,:]{5,}}}`)
|
|
|
var siteReg = regexp.MustCompile(`(政府采购|公共资源)`)
|
|
|
-var saveothernum = 0
|
|
|
-var saveyqnum = 0
|
|
|
-var savenum = 0
|
|
|
-var savefilenum = 0
|
|
|
-var tmpsavenum = 0
|
|
|
-var updatenum = 0
|
|
|
-var errnum = 0
|
|
|
-var saveotherlock *sync.Mutex = new(sync.Mutex) //保存临时锁
|
|
|
-var saveyqlock *sync.Mutex = new(sync.Mutex) //保存临时锁
|
|
|
-var savelock *sync.Mutex = new(sync.Mutex) //保存锁
|
|
|
-var savefilelock *sync.Mutex = new(sync.Mutex) //保存附件信息锁
|
|
|
-var updatelock *sync.Mutex = new(sync.Mutex) //更新锁
|
|
|
-//var errorlock *sync.Mutex = new(sync.Mutex) //异常数据锁
|
|
|
-var SaveOtherCache = []map[string]interface{}{} //批量保存临时
|
|
|
-var SaveYqCache = []map[string]interface{}{} //批量保存临时
|
|
|
-var SaveCache = []map[string]interface{}{} //批量保存
|
|
|
-var SaveFileCache = []map[string]interface{}{} //批量保存附件信息
|
|
|
-var UpdateCache = [][]map[string]interface{}{} //批量更新
|
|
|
-//var ErrorCache = []map[string]interface{}{} //异常数据集
|
|
|
-var SaveOtherLastTime = time.Now().Unix()
|
|
|
-var SaveYqLastTime = time.Now().Unix()
|
|
|
-var SaveLastTime = time.Now().Unix()
|
|
|
-var SaveFileLastTime = time.Now().Unix()
|
|
|
-var UpdateLastTime = time.Now().Unix()
|
|
|
-var ErrorLastTime = time.Now().Unix()
|
|
|
-var SaveColl = "bidding_nomal"
|
|
|
-var SaveFileColl = "bidding_file"
|
|
|
-var ErrColl = "spider_warn"
|
|
|
-var SaveOtherColl = "bidding_other" //临时存储不用的数据
|
|
|
-var SaveYqColl = "bidding_yq" //舆情信息存储表
|
|
|
+var (
|
|
|
+ tmpsavenum = 0 //数据量监控标识
|
|
|
+ StopFlag int //暂停标识
|
|
|
+ //other
|
|
|
+ SaveOtherColl = "bidding_other" //临时存储不用的数据
|
|
|
+ saveothernum = 0
|
|
|
+ saveotherlock *sync.Mutex = new(sync.Mutex) //保存临时锁
|
|
|
+ SaveOtherCache = []map[string]interface{}{} //批量保存临时
|
|
|
+ SaveOtherLastTime = time.Now().Unix()
|
|
|
+ //舆情
|
|
|
+ SaveYqColl = "bidding_yq" //舆情信息存储表
|
|
|
+ saveyqnum = 0
|
|
|
+ saveyqlock *sync.Mutex = new(sync.Mutex) //保存临时锁
|
|
|
+ SaveYqCache = []map[string]interface{}{} //批量保存临时
|
|
|
+ SaveYqLastTime = time.Now().Unix()
|
|
|
+ //标讯无附件数据
|
|
|
+ SaveColl = "bidding_nomal"
|
|
|
+ savenum = 0
|
|
|
+ savelock *sync.Mutex = new(sync.Mutex) //保存锁
|
|
|
+ SaveCache = []map[string]interface{}{} //批量保存
|
|
|
+ SaveLastTime = time.Now().Unix()
|
|
|
+ //标讯含附件数据
|
|
|
+ SaveFileColl = "bidding_file"
|
|
|
+ savefilenum = 0
|
|
|
+ savefilelock *sync.Mutex = new(sync.Mutex) //保存附件信息锁
|
|
|
+ SaveFileCache = []map[string]interface{}{} //批量保存附件信息
|
|
|
+ SaveFileLastTime = time.Now().Unix()
|
|
|
+ //其它
|
|
|
+ ErrColl = "spider_warn"
|
|
|
+ updatenum = 0
|
|
|
+ updatelock *sync.Mutex = new(sync.Mutex) //更新锁
|
|
|
+ UpdateCache = [][]map[string]interface{}{} //批量更新
|
|
|
+ UpdateLastTime = time.Now().Unix()
|
|
|
+)
|
|
|
|
|
|
-//批量保存舆情数据
|
|
|
+// 批量保存舆情数据
|
|
|
func saveYqMust() {
|
|
|
saveyqnum += len(SaveYqCache)
|
|
|
tools.Mgo.SaveBulk(SaveYqColl, SaveYqCache...)
|
|
@@ -60,7 +64,7 @@ func saveYqMust() {
|
|
|
SaveYqLastTime = time.Now().Unix()
|
|
|
}
|
|
|
|
|
|
-//批量保存临时数据
|
|
|
+// 批量保存临时数据
|
|
|
func saveOtherMust() {
|
|
|
saveothernum += len(SaveOtherCache)
|
|
|
tools.Mgo.SaveBulk(SaveOtherColl, SaveOtherCache...)
|
|
@@ -70,7 +74,7 @@ func saveOtherMust() {
|
|
|
SaveOtherLastTime = time.Now().Unix()
|
|
|
}
|
|
|
|
|
|
-//批量保存
|
|
|
+// 批量保存
|
|
|
func saveMust() {
|
|
|
//qutil.Debug("---------批量保存--------")
|
|
|
savenum += len(SaveCache)
|
|
@@ -81,7 +85,7 @@ func saveMust() {
|
|
|
SaveLastTime = time.Now().Unix()
|
|
|
}
|
|
|
|
|
|
-//批量保存附件信息
|
|
|
+// 批量保存附件信息
|
|
|
func saveFileMust() {
|
|
|
savefilenum += len(SaveFileCache)
|
|
|
tools.Mgo.SaveBulk(SaveFileColl, SaveFileCache...)
|
|
@@ -91,7 +95,7 @@ func saveFileMust() {
|
|
|
SaveFileLastTime = time.Now().Unix()
|
|
|
}
|
|
|
|
|
|
-//批量更新
|
|
|
+// 批量更新
|
|
|
func updateMust() {
|
|
|
//qutil.Debug("---------批量更新--------")
|
|
|
updatenum += len(UpdateCache)
|
|
@@ -113,7 +117,7 @@ func updateMust() {
|
|
|
// ErrorLastTime = time.Now().Unix()
|
|
|
// }
|
|
|
|
|
|
-//定时保存
|
|
|
+// 定时保存
|
|
|
func TimerSave() {
|
|
|
//批量保存bidding_yq
|
|
|
go func() {
|
|
@@ -204,10 +208,9 @@ func TimerSave() {
|
|
|
}()
|
|
|
}
|
|
|
|
|
|
-//实时处理招标信息
|
|
|
var LogLEVEL = false
|
|
|
|
|
|
-//通用保存服务
|
|
|
+// 通用保存服务
|
|
|
var msave = map[string][]map[string]interface{}{}
|
|
|
var mlock = sync.Mutex{}
|
|
|
var mnum = 0 //判断是否需要保存
|
|
@@ -264,7 +267,7 @@ func saveCommMust() {
|
|
|
mnum = 0
|
|
|
}
|
|
|
|
|
|
-//通用信息处理
|
|
|
+// 通用信息处理
|
|
|
func dealDocComm(tmp map[string]interface{}) map[string]interface{} {
|
|
|
defer qutil.Catch()
|
|
|
delete(tmp, "T")
|