|
@@ -95,11 +95,38 @@ func NewSaveBidding(tmp map[string]interface{}) (b bool, res int, mgoid, mgocoll
|
|
|
errorData(LEVEL_ERROR, true, "publishtime", "Field Value Is Null", href, title, &warn, tmp)
|
|
|
tmp["publishtime"] = nowTime
|
|
|
}
|
|
|
+ //附件校验
|
|
|
+ downloadFileOk := false //标记此条数据是否有上传oss成功的附件
|
|
|
+ fileUploadErrIsSave := false //避免一条数据出现多个异常附件,保存多条错误信息
|
|
|
+ fileSizeUrlErrIsSave := false
|
|
|
+ if projectinfo, ok := tmp["projectinfo"].(map[string]interface{}); ok {
|
|
|
+ if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok {
|
|
|
+ for _, data := range attachments {
|
|
|
+ if d, ok := data.(map[string]interface{}); ok {
|
|
|
+ org_url := qutil.ObjToString(d["org_url"])
|
|
|
+ fid := qutil.ObjToString(d["fid"])
|
|
|
+ size := qutil.ObjToString(d["size"])
|
|
|
+ if !fileUploadErrIsSave && org_url != "" && fid == "" { //附件下载上传问题
|
|
|
+ errorData(LEVEL_WARN, true, "projectinfo", "Attachment Upload Failed", href, title, &warn, tmp)
|
|
|
+ fileUploadErrIsSave = true
|
|
|
+ }
|
|
|
+ if !fileSizeUrlErrIsSave && (strings.HasSuffix(org_url, "javascript:void(0)") || size == "3.7 KB") {
|
|
|
+ errorData(LEVEL_WARN, true, "projectinfo", "File Size Or Url Error", href, title, &warn, tmp)
|
|
|
+ fileSizeUrlErrIsSave = true
|
|
|
+ }
|
|
|
+ if fid != "" { //附件上传成功
|
|
|
+ downloadFileOk = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
detail := qutil.ObjToString(tmp["detail"])
|
|
|
uuid := sp.Sha(detail) //拟建数据2022-04-26也改为detail判重
|
|
|
//数据判重
|
|
|
if tmp["repeat"] == nil { //判断repeat,为了异常数据重推时不进行redis判重
|
|
|
- b, res = dataRepeat(tmp, href, hashHref, uuid)
|
|
|
+ b, res = dataRepeat(tmp, href, hashHref, uuid, downloadFileOk)
|
|
|
if b && res == 4 {
|
|
|
return
|
|
|
}
|
|
@@ -129,34 +156,8 @@ func NewSaveBidding(tmp map[string]interface{}) (b bool, res int, mgoid, mgocoll
|
|
|
delete(tmp, "s_title") //删除s_title
|
|
|
}
|
|
|
iscompete, _ := tmp["iscompete"].(bool) //判断是否是2021-11-01之后的新爬虫,新爬虫iscompete不能为空
|
|
|
- //2.4附件校验
|
|
|
- downloadFileOk := false //标记此条数据是否有上传oss成功的附件
|
|
|
- fileUploadErrIsSave := false //避免一条数据出现多个异常附件,保存多条错误信息
|
|
|
- fileSizeUrlErrIsSave := false
|
|
|
- if projectinfo, ok := tmp["projectinfo"].(map[string]interface{}); ok {
|
|
|
- if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok {
|
|
|
- for _, data := range attachments {
|
|
|
- if d, ok := data.(map[string]interface{}); ok {
|
|
|
- org_url := qutil.ObjToString(d["org_url"])
|
|
|
- fid := qutil.ObjToString(d["fid"])
|
|
|
- size := qutil.ObjToString(d["size"])
|
|
|
- if !fileUploadErrIsSave && org_url != "" && fid == "" { //附件下载上传问题
|
|
|
- errorData(LEVEL_WARN, true, "projectinfo", "Attachment Upload Failed", href, title, &warn, tmp)
|
|
|
- fileUploadErrIsSave = true
|
|
|
- }
|
|
|
- if !fileSizeUrlErrIsSave && (strings.HasSuffix(org_url, "javascript:void(0)") || size == "3.7 KB") {
|
|
|
- errorData(LEVEL_WARN, true, "projectinfo", "File Size Or Url Error", href, title, &warn, tmp)
|
|
|
- fileSizeUrlErrIsSave = true
|
|
|
- }
|
|
|
- if fid != "" { //附件上传成功
|
|
|
- downloadFileOk = true
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- //2.5校验mustfield(title,href,detail,projectname)
|
|
|
+ //2.4校验mustfield(title,href,detail,projectname)
|
|
|
code := qutil.ObjToString(tmp["spidercode"])
|
|
|
nocheck := strings.HasSuffix(code, "_nocheck")
|
|
|
for _, f := range mustfield {
|
|
@@ -271,7 +272,7 @@ func NewSaveBidding(tmp map[string]interface{}) (b bool, res int, mgoid, mgocoll
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- //2.6部分字段校验
|
|
|
+ //2.5部分字段校验
|
|
|
for _, cf := range checkfield {
|
|
|
if qutil.ObjToString(tmp[cf]) == "" {
|
|
|
errorData(LEVEL_ERROR, true, cf, "Field Value Is Null", href, title, &warn, tmp)
|
|
@@ -348,7 +349,7 @@ func NewSaveBidding(tmp map[string]interface{}) (b bool, res int, mgoid, mgocoll
|
|
|
}
|
|
|
|
|
|
// 数据判重
|
|
|
-func dataRepeat(tmp map[string]interface{}, href, hashHref, uuid string) (b bool, res int) {
|
|
|
+func dataRepeat(tmp map[string]interface{}, href, hashHref, uuid string, downloadFileOk bool) (b bool, res int) {
|
|
|
detail := qutil.ObjToString(tmp["detail"])
|
|
|
filterDetail := sp.FilterDetail(detail) //只保留文本内容
|
|
|
//新版数据判重
|
|
@@ -402,7 +403,14 @@ func dataRepeat(tmp map[string]interface{}, href, hashHref, uuid string) (b bool
|
|
|
tmp["hashref"] = hashHref
|
|
|
SaveMgoCache <- tmp //记录重复数据
|
|
|
if repeatby == "bloom_detail" || repeatby == "sha_detail" { //被正文判重,保留数据,打上标记
|
|
|
- tmp["extracttype"] = -1
|
|
|
+ /*
|
|
|
+ 日期:2024-04-26
|
|
|
+ 逻辑:含有效附件的数据不进行正文判重
|
|
|
+ 提出者:张金坤
|
|
|
+ */
|
|
|
+ if !downloadFileOk { //含有效附件,不进行正文判重及extracttype != -1
|
|
|
+ tmp["extracttype"] = -1
|
|
|
+ }
|
|
|
return false, 4
|
|
|
}
|
|
|
return true, 4
|