|
@@ -17,10 +17,11 @@ import (
|
|
|
//固定后缀
|
|
|
//var Reg2 = regexp.MustCompile(`((http|https)[::]//(www.)?|www.|WWW.)([0-9A-Za-z_]+[-\.]{0,})+\.(cn|asia|hn|citic|ltd|tv|shop|com|mo|co|net|cnpc|CN|CC|cc|pro|aero|coop|hk|tw|me|rec|arts|store|firm|int|info|org|top|wang|ren|xyz|xin|pub|tech|ink|biz|red|gov|vip|art|edu)+`)
|
|
|
//支持空格
|
|
|
-//var Reg1 = regexp.MustCompile("((http|https)[::]//(www.)?|www.|WWW.)([\\s\u3000\u2003\u00a0]{0,}[-A-Za-z0-9&@$??#/%=~_|.::,]+)+([\\s\u3000\u2003\u00a0]{0,}(com|cn|net))?[-A-Za-z0-9&@$??#/%=~_|.::,]+")
|
|
|
-var Reg1 = regexp.MustCompile(`((http|https)[::]//(www\.)?|www\.|WWW\.)[-A-Za-z0-9&@$??#/%=~_|.::,]+`)
|
|
|
-var Reg2 = regexp.MustCompile(`((http|https)[::]//(www\.)?|www\.|WWW\.)(\w+[-.]{0,})+`)
|
|
|
-var Clear1 = regexp.MustCompile(".*(cn|com|org|net|co|mo)((\\d)+[.]{0,}(\\d){0,})$")
|
|
|
+var Reg1 = regexp.MustCompile("((http|https)[::]//(www\\.)?|www\\.|WWW\\.)([\\s\u3000\u2003\u00a0]{0,}[-A-Za-z0-9&@$??#/%=~_|.::,]+)+([\\s\u3000\u2003\u00a0]{0,}(com|cn|net))?[-A-Za-z0-9&@$??#/%=~_|.::,]+")
|
|
|
+
|
|
|
+//var Reg1 = regexp.MustCompile(`((http|https)[::]//(www\.)?|www\.|WWW\.)[-A-Za-z0-9&@$??#/%=~_|.::,]+`)
|
|
|
+var Reg2 = regexp.MustCompile("((http|https)[::]//(www\\.)?|www\\.|WWW\\.)(\\w+[-.\\s\u3000\u2003\u00a0]{0,})+")
|
|
|
+var Clear1 = regexp.MustCompile(".*(cn|com|org|net|co|mo|vn|en)((\\d)+[.]{0,}(\\d){0,})$")
|
|
|
var RegSpace = regexp.MustCompile("[\\s\u3000\u2003\u00a0]+")
|
|
|
var Replace = map[string]string{
|
|
|
":": ":",
|
|
@@ -63,7 +64,7 @@ func StartTask() {
|
|
|
field := map[string]interface{}{"detail": 1}
|
|
|
logger.Debug("query:", q)
|
|
|
it := sess.DB("qfw").C("bidding").Find(q).Select(field).Sort("_id").Iter()
|
|
|
- count := Mgo.Count("bidding", q)
|
|
|
+ count := Mgo.Count("test", q)
|
|
|
fmt.Println("共加载数据", count)
|
|
|
sum := 0
|
|
|
wg := &sync.WaitGroup{}
|
|
@@ -169,6 +170,7 @@ func StartTask() {
|
|
|
// arr = [][]map[string]interface{}{}
|
|
|
// }
|
|
|
lock_dmn.Unlock()
|
|
|
+ fmt.Println("本轮任务结束")
|
|
|
}
|
|
|
|
|
|
//加载域名信息
|