|
@@ -250,10 +250,16 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job) {
|
|
|
tmpDeatil := detail
|
|
|
tmpdocument, err := goquery.NewDocumentFromReader(strings.NewReader(tmpDeatil))
|
|
|
if err == nil {
|
|
|
- if utf8.RuneCountInString(strings.Trim(tmpdocument.Text(), " ")) < 50 {
|
|
|
+ conlen := utf8.RuneCountInString(strings.Trim(tmpdocument.Text(), " "))
|
|
|
+ if conlen < 50 {
|
|
|
if isextFile {
|
|
|
detail += qu.ObjToString(doc["detailfile"])
|
|
|
+ doc["detail"] = detail
|
|
|
}
|
|
|
+ } else if conlen > qu.IntAllDef(ju.Config["filelength"], 100000) {
|
|
|
+ //防止文本过长,造成抽取阻塞
|
|
|
+ log.Debug("文本太长", doc["_id"], conlen)
|
|
|
+ doc["detail"] = d3
|
|
|
}
|
|
|
}
|
|
|
toptype := qu.ObjToString(doc["toptype"])
|