|
@@ -90,6 +90,7 @@ var Mgo *mgo.MongodbSim
|
|
|
var TimeChan = make(chan bool, 1)
|
|
|
var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
|
|
|
var DomainNameReg = regexp.MustCompile(`(?://).+?(?:)[::/]`)
|
|
|
+var RepDomainNameReg = regexp.MustCompile(`[::/]+`)
|
|
|
|
|
|
//心跳
|
|
|
func UpdateHeart(site, channel, code, user, t string) {
|
|
@@ -421,8 +422,8 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
|
|
|
func (s *Spider) ThisSiteData(tmp map[string]interface{}) {
|
|
|
defer qu.Catch()
|
|
|
href := qu.ObjToString(tmp["href"])
|
|
|
- url_dn := DomainNameReg.FindString(s.TargetChannelUrl)
|
|
|
- href_dn := DomainNameReg.FindString(href)
|
|
|
+ url_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(s.TargetChannelUrl), "")
|
|
|
+ href_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(href), "")
|
|
|
if url_dn != href_dn {
|
|
|
SaveMgoCache <- map[string]interface{}{
|
|
|
"site": s.Name,
|