|
@@ -383,34 +383,25 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
|
|
|
//遍历附件字段内容,拼接在一起;附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
|
|
//遍历附件字段内容,拼接在一起;附件文本堆一起(后期可以考虑,分开处理),方法里修改了doc["detailfile"]结果
|
|
func file2text(doc *map[string]interface{}) {
|
|
func file2text(doc *map[string]interface{}) {
|
|
- var strfileinfo bytes.Buffer
|
|
|
|
- if v, ok := (*doc)["projectinfo"].(map[string]interface{}); ok {
|
|
|
|
- if va, ok := v["attachments"].(map[string]interface{}); ok {
|
|
|
|
- for _, vaatt := range va {
|
|
|
|
- if fileinfo, ok := vaatt.(map[string]interface{}); ok {
|
|
|
|
- if qu.ObjToString(fileinfo["content"]) != "" {
|
|
|
|
- switch fileinfo["content"].(type) {
|
|
|
|
- case string:
|
|
|
|
- lock.Lock()
|
|
|
|
- strfileinfo.WriteString(fileinfo["content"].(string) + " \n")
|
|
|
|
- lock.Unlock()
|
|
|
|
- case []map[string]interface{}:
|
|
|
|
- for _, fv := range fileinfo["content"].([]map[string]interface{}) {
|
|
|
|
- if fv["context"] != nil {
|
|
|
|
- lock.Lock()
|
|
|
|
- strfileinfo.WriteString(fv["context"].(string) + " \n")
|
|
|
|
- lock.Unlock()
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
|
|
+ tmpstr := ""
|
|
|
|
+ if attach_text, ok := (*doc)["attach_text"].(map[string]interface{}); ok {
|
|
|
|
+ for _, attachs := range attach_text {
|
|
|
|
+ if fileinfos, ok := attachs.(map[string]interface{}); ok {
|
|
|
|
+ for _, fileinfo := range fileinfos {
|
|
|
|
+ if ff, ok := fileinfo.(map[string]interface{}); ok {
|
|
|
|
+ attach_url := qu.ObjToString(ff["attach_url"])
|
|
|
|
+ bs := ju.OssGetObject(attach_url)
|
|
|
|
+ if utf8.RuneCountInString(tmpstr+bs) < qu.IntAllDef(ju.Config["filelength"], 100000) {
|
|
|
|
+ tmpstr += bs + "\n"
|
|
|
|
+ } else {
|
|
|
|
+ break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- if utf8.RuneCountInString(strfileinfo.String()) < qu.IntAllDef(ju.Config["filelength"], 100000) {
|
|
|
|
- (*doc)["detailfile"] = strfileinfo.String() //附件文本堆一起(后期可以考虑,分开处理)
|
|
|
|
- }
|
|
|
|
|
|
+ (*doc)["detailfile"] = tmpstr
|
|
}
|
|
}
|
|
|
|
|
|
//抽取
|
|
//抽取
|