maxiaoshan 3 年之前
父节点
当前提交
aacd62274b
共有 1 个文件被更改,包括 3 次插入2 次删除
  1. 3 2
      src/spider/spider.go

+ 3 - 2
src/spider/spider.go

@@ -90,6 +90,7 @@ var Mgo *mgo.MongodbSim
 var TimeChan = make(chan bool, 1)
 var Reg = regexp.MustCompile(`(http|https)://([\w]+\.)+[\w]+(/?)`)
 var DomainNameReg = regexp.MustCompile(`(?://).+?(?:)[::/]`)
+var RepDomainNameReg = regexp.MustCompile(`[::/]+`)
 
 //心跳
 func UpdateHeart(site, channel, code, user, t string) {
@@ -421,8 +422,8 @@ func (s *Spider) DownListPageItem() (errs interface{}) {
 func (s *Spider) ThisSiteData(tmp map[string]interface{}) {
 	defer qu.Catch()
 	href := qu.ObjToString(tmp["href"])
-	url_dn := DomainNameReg.FindString(s.TargetChannelUrl)
-	href_dn := DomainNameReg.FindString(href)
+	url_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(s.TargetChannelUrl), "")
+	href_dn := RepDomainNameReg.ReplaceAllString(DomainNameReg.FindString(href), "")
 	if url_dn != href_dn {
 		SaveMgoCache <- map[string]interface{}{
 			"site":       s.Name,