|
@@ -44,12 +44,6 @@ type Data struct {
|
|
// TimeTaskGetLua 定时取爬虫放入通道
|
|
// TimeTaskGetLua 定时取爬虫放入通道
|
|
func TimeTaskGetLua() {
|
|
func TimeTaskGetLua() {
|
|
query := map[string]interface{}{
|
|
query := map[string]interface{}{
|
|
- "event": map[string]interface{}{
|
|
|
|
- "$ne": 7410,
|
|
|
|
- },
|
|
|
|
- "incrementevent": map[string]interface{}{
|
|
|
|
- "$ne": 7410,
|
|
|
|
- },
|
|
|
|
"platform": "golua平台",
|
|
"platform": "golua平台",
|
|
"state": 1,
|
|
"state": 1,
|
|
"check": map[string]interface{}{
|
|
"check": map[string]interface{}{
|
|
@@ -273,19 +267,19 @@ func listResultCheck(pageNum int, lists map[int][]map[string]interface{}, err, w
|
|
warn["列表页翻页异常"] = "列表页可以翻页,最大页却为1"
|
|
warn["列表页翻页异常"] = "列表页可以翻页,最大页却为1"
|
|
}
|
|
}
|
|
var fileNum, downloadOkNum, fileFailedNum int //附件下载量,详情页下载量
|
|
var fileNum, downloadOkNum, fileFailedNum int //附件下载量,详情页下载量
|
|
- n := 0
|
|
|
|
wg := &sync.WaitGroup{}
|
|
wg := &sync.WaitGroup{}
|
|
lock := &sync.Mutex{}
|
|
lock := &sync.Mutex{}
|
|
ch := make(chan bool, 10)
|
|
ch := make(chan bool, 10)
|
|
- hrefs := make([][]string, 2) //记录前两页数据链接
|
|
|
|
|
|
+ hrefs := make([][]string, len(lists)) //记录前两页数据链接
|
|
|
|
+ var i, num int
|
|
for _, list := range lists {
|
|
for _, list := range lists {
|
|
pageHrefs := []string{}
|
|
pageHrefs := []string{}
|
|
for _, l := range list {
|
|
for _, l := range list {
|
|
pageHrefs = append(pageHrefs, qu.ObjToString(l["href"]))
|
|
pageHrefs = append(pageHrefs, qu.ObjToString(l["href"]))
|
|
- if n > 50 {
|
|
|
|
|
|
+ if num > 50 {
|
|
break
|
|
break
|
|
}
|
|
}
|
|
- n++
|
|
|
|
|
|
+ num++
|
|
ch <- true
|
|
ch <- true
|
|
wg.Add(1)
|
|
wg.Add(1)
|
|
go func(tmp map[string]interface{}) {
|
|
go func(tmp map[string]interface{}) {
|
|
@@ -330,24 +324,32 @@ func listResultCheck(pageNum int, lists map[int][]map[string]interface{}, err, w
|
|
lock.Unlock()
|
|
lock.Unlock()
|
|
}(l)
|
|
}(l)
|
|
}
|
|
}
|
|
- hrefs = append(hrefs, pageHrefs)
|
|
|
|
|
|
+ hrefs[i] = pageHrefs
|
|
|
|
+ i++
|
|
}
|
|
}
|
|
wg.Wait()
|
|
wg.Wait()
|
|
- hrefByte1, _ := json.Marshal(hrefs[0])
|
|
|
|
- hrefByte2, _ := json.Marshal(hrefs[1])
|
|
|
|
- if sp.GetHashKey(hrefByte1) == sp.GetHashKey(hrefByte2) {
|
|
|
|
- err["前两页数据一致"] = "前两页数据一致"
|
|
|
|
|
|
+
|
|
|
|
+ if len(hrefs) >= 2 {
|
|
|
|
+ hrefByte1, _ := json.Marshal(hrefs[0])
|
|
|
|
+ hrefByte2, _ := json.Marshal(hrefs[1])
|
|
|
|
+ if sp.GetHashKey(hrefByte1) == sp.GetHashKey(hrefByte2) {
|
|
|
|
+ err["前两页数据一致"] = "前两页数据一致"
|
|
|
|
+ }
|
|
}
|
|
}
|
|
if fileNum == 0 {
|
|
if fileNum == 0 {
|
|
warn["未下载任何附件"] = "未下载任何附件"
|
|
warn["未下载任何附件"] = "未下载任何附件"
|
|
}
|
|
}
|
|
if fileFailedNum > 0 {
|
|
if fileFailedNum > 0 {
|
|
|
|
+ href := warn["附件未成功下载"]
|
|
|
|
+ delete(warn, "附件未成功下载")
|
|
rate := float64(fileFailedNum) / float64(fileNum)
|
|
rate := float64(fileFailedNum) / float64(fileNum)
|
|
- warn["部分附件未成功下载"] = "部分附件未成功下载,一共+" + fmt.Sprint(len(lists)) + "个,失败占比:" + fmt.Sprintf("%.1f", rate) + "(" + warn["附件未成功下载"] + ")"
|
|
|
|
|
|
+ warn["部分附件未成功下载"] = "部分附件未成功下载,一共" + fmt.Sprint(fileNum) + "个,失败占比:" + fmt.Sprintf("%.1f%%", rate*100) + "(" + href + ")"
|
|
}
|
|
}
|
|
- if n := len(lists) - downloadOkNum; n > 0 {
|
|
|
|
- rate := float64(n) / float64(len(lists))
|
|
|
|
- warn["部分详情页未下载成功"] = "部分详情页未成功下载,一共+" + fmt.Sprint(len(lists)) + "条,失败占比:" + fmt.Sprintf("%.1f", rate) + "(" + warn["详情页下载失败"] + ")"
|
|
|
|
|
|
+ if n := num - downloadOkNum; n > 0 {
|
|
|
|
+ href := warn["详情页下载失败"]
|
|
|
|
+ delete(warn, "详情页下载失败")
|
|
|
|
+ rate := float64(n) / float64(num)
|
|
|
|
+ warn["部分详情页未下载成功"] = "部分详情页未成功下载,一共" + fmt.Sprint(num) + "条,失败占比:" + fmt.Sprintf("%.1f%%", rate*100) + "(" + href + ")"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@@ -360,7 +362,7 @@ func downloadDetail(fileNum, fileFailedNumTmp, downloadOkNum *int, err, warn map
|
|
param[k] = qu.ObjToString(v)
|
|
param[k] = qu.ObjToString(v)
|
|
}
|
|
}
|
|
var downloadDetailOk bool
|
|
var downloadDetailOk bool
|
|
- for i := 1; i <= 3; i++ { //重试三次
|
|
|
|
|
|
+ for i := 1; i <= 2; i++ { //重试三次
|
|
_, rep_err := s.DownloadDetailPage(param, data)
|
|
_, rep_err := s.DownloadDetailPage(param, data)
|
|
if rep_err == nil && len(data) > 0 {
|
|
if rep_err == nil && len(data) > 0 {
|
|
downloadDetailOk = true
|
|
downloadDetailOk = true
|
|
@@ -375,20 +377,21 @@ func downloadDetail(fileNum, fileFailedNumTmp, downloadOkNum *int, err, warn map
|
|
tmpMap := tmp.(map[string]interface{})
|
|
tmpMap := tmp.(map[string]interface{})
|
|
if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" {
|
|
if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" {
|
|
getFile = true
|
|
getFile = true
|
|
|
|
+ *fileNum++
|
|
warn["附件含乱码"] = "部分附件中含有乱码(" + qu.ObjToString(param["href"]) + ")"
|
|
warn["附件含乱码"] = "部分附件中含有乱码(" + qu.ObjToString(param["href"]) + ")"
|
|
} else if qu.ObjToString(tmpMap["org_url"]) != "" && qu.ObjToString(tmpMap["fid"]) == "" { //附件未下载成功
|
|
} else if qu.ObjToString(tmpMap["org_url"]) != "" && qu.ObjToString(tmpMap["fid"]) == "" { //附件未下载成功
|
|
warn["附件未成功下载"] = qu.ObjToString(param["href"])
|
|
warn["附件未成功下载"] = qu.ObjToString(param["href"])
|
|
getFile = true
|
|
getFile = true
|
|
|
|
+ *fileNum++
|
|
*fileFailedNumTmp++
|
|
*fileFailedNumTmp++
|
|
} else if qu.ObjToString(tmpMap["fid"]) != "" {
|
|
} else if qu.ObjToString(tmpMap["fid"]) != "" {
|
|
getFile = true
|
|
getFile = true
|
|
|
|
+ *fileNum++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- if getFile {
|
|
|
|
- *fileNum++ //记录附件下载个数
|
|
|
|
- } else {
|
|
|
|
|
|
+ if !getFile {
|
|
contenthtml := qu.ObjToString(data["contenthtml"])
|
|
contenthtml := qu.ObjToString(data["contenthtml"])
|
|
//if !checkFile(contenthtml) {
|
|
//if !checkFile(contenthtml) {
|
|
// err["contenthtml中含有可下载内容"] = "contenthtml中含有可下载内容"
|
|
// err["contenthtml中含有可下载内容"] = "contenthtml中含有可下载内容"
|