|
@@ -537,10 +537,10 @@ func file2text(doc *map[string]interface{}) {
|
|
|
(*doc)["detailfile"] = strings.ReplaceAll(tmpstr, "附件", "")
|
|
|
}
|
|
|
|
|
|
-//抽取
|
|
|
+//抽取-正文
|
|
|
func (e *ExtractTask) ExtractProcess(j, jf *ju.Job, isSite bool) {
|
|
|
- e.ExtractDetail(j, isSite, j.SpiderCode)
|
|
|
- if jf != nil && jf.IsFile {
|
|
|
+ e.ExtractDetail(j, isSite, j.SpiderCode) //正文-抽取属性
|
|
|
+ if jf != nil && jf.IsFile { //附件jf → j 合并
|
|
|
e.ExtractDetail(jf, isSite, j.SpiderCode)
|
|
|
for tmpk, xs := range jf.Result {
|
|
|
if len(j.Result[tmpk]) == 0 {
|
|
@@ -771,7 +771,7 @@ func (e *ExtractTask) ExtractDetail(j *ju.Job, isSite bool, codeSite string) {
|
|
|
lockclear.Unlock()
|
|
|
}
|
|
|
}
|
|
|
- PackageDetail(j, e, isSite, codeSite) //处理分包信息
|
|
|
+ PackageDetail(j, e, isSite, codeSite) //处理分包信息-去重
|
|
|
// bs, _ := json.Marshal(j.Result)
|
|
|
// log.Debug("抽取结果", j.Title, j.SourceMid, string(bs))
|
|
|
}, func(err interface{}) {
|
|
@@ -1886,6 +1886,7 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
//重新取出清理过后的中标候选人
|
|
|
resetWinnerorder(j)
|
|
|
+ //打分
|
|
|
doc, result, _id := funcAnalysis(j, e)
|
|
|
//_, result, _id := funcAnalysis(j, e)
|
|
|
if ju.IsSaveTag {
|
|
@@ -2403,6 +2404,7 @@ func delFiled(k string) bool {
|
|
|
return k == "detailfile" || k == "summary" || k == "detail" || k == "contenthtml" || k == "site" || k == "spidercode" || k == "projectinfo" || k == "jsondata"
|
|
|
}
|
|
|
|
|
|
+//分析-打分排序
|
|
|
func funcAnalysis(j *ju.Job, e *ExtractTask) (*map[string]interface{}, map[string][]*ju.ExtField, string) {
|
|
|
defer qu.Catch()
|
|
|
doc := j.Data
|
|
@@ -2415,7 +2417,7 @@ func funcAnalysis(j *ju.Job, e *ExtractTask) (*map[string]interface{}, map[strin
|
|
|
ju.Sort(val)
|
|
|
}
|
|
|
if !(len(j.Result) <= 0 || j.Jsondata == nil || len(*j.Jsondata) <= 0) {
|
|
|
- //jsondata清理
|
|
|
+
|
|
|
clearJd(j.Jsondata, e, j.SpiderCode, j.IsClearnMoney)
|
|
|
marshalbt, _ := json.Marshal(j.Jsondata)
|
|
|
tmpjddata := make(map[string]interface{})
|