|
@@ -440,6 +440,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
RuleBlock: e.RuleBlock,
|
|
|
Dataging: qu.IntAll(doc["dataging"]),
|
|
|
IsClearnMoney: isClearnMoneystr,
|
|
|
+ IsUnRulesTab : false,
|
|
|
}
|
|
|
if isextFile {
|
|
|
jf = &ju.Job{
|
|
@@ -460,6 +461,7 @@ func (e *ExtractTask) PreInfo(doc map[string]interface{}) (j, jf *ju.Job, isSite
|
|
|
IsFile: isextFile,
|
|
|
Dataging: qu.IntAll(doc["dataging"]),
|
|
|
IsClearnMoney: isClearnMoneystr,
|
|
|
+ IsUnRulesTab : false,
|
|
|
}
|
|
|
}
|
|
|
codeSite := j.SpiderCode
|
|
@@ -1903,6 +1905,66 @@ var clearWinnerReg = regexp.MustCompile("名称|施工|拟定供应商名称|:
|
|
|
var unPackageWinnerReg = regexp.MustCompile("(重新招标)")
|
|
|
|
|
|
|
|
|
+//特殊金额-处理判断-倍率关系
|
|
|
+func calculateAbnormalMoney(val []*ju.ExtField) (bool,int) {
|
|
|
+ //金额结果只有两种 - 倍率关系10000 - 过10E
|
|
|
+ moneyIndex := []int{}
|
|
|
+ moneyArr := []float64{}
|
|
|
+ difValue := map[string]interface{}{}
|
|
|
+ for k, v := range val { //取第一个非负数,项目名称除外
|
|
|
+ if v.IsTrue && v.Score > -1 {
|
|
|
+ moneyArr = append(moneyArr,qu.Float64All(v.Value))
|
|
|
+ moneyIndex = append(moneyIndex,k)
|
|
|
+ key := ""
|
|
|
+ if m,ok := v.Value.(float64);ok {
|
|
|
+ key = fmt.Sprintf("%f",m)
|
|
|
+ }else {
|
|
|
+ key = qu.ObjToString(v.Value)
|
|
|
+ }
|
|
|
+ if difValue[key]==nil {
|
|
|
+ difValue[key] = 1
|
|
|
+ }
|
|
|
+ if len(difValue)>2 {
|
|
|
+ return false,0
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ //计算金额数组
|
|
|
+ if len(difValue)==2 {
|
|
|
+ money_1,money_2 := float64(0),float64(0)
|
|
|
+ for k,v := range moneyArr{
|
|
|
+ if k==0 {
|
|
|
+ money_1=v
|
|
|
+ }else {
|
|
|
+ if v!=money_1 {
|
|
|
+ money_2=v
|
|
|
+ break
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ isRatio,new_money:= false,float64(0) //判断金额是否为倍率关系
|
|
|
+ if money_1!=float64(0)&&money_2!=float64(0) {
|
|
|
+ if money_1 == money_2*float64(10000) && money_1>=1000000000{
|
|
|
+ isRatio = true
|
|
|
+ new_money = money_2
|
|
|
+ }
|
|
|
+ if money_2 == money_1*float64(10000) && money_2>=1000000000{
|
|
|
+ isRatio = true
|
|
|
+ new_money = money_1
|
|
|
+ }
|
|
|
+
|
|
|
+ if isRatio { //采用新值
|
|
|
+ for k,v := range moneyArr{
|
|
|
+ if v==new_money {
|
|
|
+ return true,moneyIndex[k]
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return false,0
|
|
|
+}
|
|
|
+
|
|
|
|
|
|
//分析抽取结果并保存
|
|
|
func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
@@ -1989,8 +2051,19 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
|
|
|
//预算-中标金额字段-特殊情况特殊处理
|
|
|
-
|
|
|
-
|
|
|
+ if k=="bidamount" || k=="budget" {
|
|
|
+ b,index :=calculateAbnormalMoney(val)
|
|
|
+ if b {
|
|
|
+ new_v := val[index]
|
|
|
+ tmp[new_v.Field] = new_v.Value
|
|
|
+ fieldSource[new_v.Field] = map[string]interface{}{
|
|
|
+ "ext_type":new_v.Type,
|
|
|
+ "ext_from":new_v.ExtFrom,
|
|
|
+ }
|
|
|
+ tmp["is_dif_ratioMoney"] = true
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
for _, v := range val { //取第一个非负数,项目名称除外
|
|
|
//存0是否有效
|
|
@@ -2047,11 +2120,16 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
}
|
|
|
if qu.Float64All(tmp["budget"]) < tmpBudget {
|
|
|
- fieldSource["budget"] = map[string]interface{}{
|
|
|
- "ext_type":"",
|
|
|
- "ext_from":"package",
|
|
|
+ if tmpBudget == qu.Float64All(tmp["budget"])*float64(10000) &&
|
|
|
+ tmpBudget>=1000000000 && qu.Float64All(tmp["budget"])>0{
|
|
|
+ tmp["is_dif_ratioMoney"] = true
|
|
|
+ }else {
|
|
|
+ fieldSource["budget"] = map[string]interface{}{
|
|
|
+ "ext_type":"",
|
|
|
+ "ext_from":"package",
|
|
|
+ }
|
|
|
+ tmp["budget"] = tmpBudget
|
|
|
}
|
|
|
- tmp["budget"] = tmpBudget
|
|
|
}
|
|
|
if qu.Float64All(tmp["agencyfee"]) < tmpAgencyfee {
|
|
|
fieldSource["agencyfee"] = map[string]interface{}{
|
|
@@ -2067,11 +2145,16 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
}
|
|
|
tmp["bidamount"] = tmpBidamount
|
|
|
} else if qu.Float64All(tmp["bidamount"]) < tmpBidamount {
|
|
|
- fieldSource["bidamount"] = map[string]interface{}{
|
|
|
- "ext_type":"",
|
|
|
- "ext_from":"package",
|
|
|
+ if tmpBidamount == qu.Float64All(tmp["bidamount"])*float64(10000) &&
|
|
|
+ tmpBidamount>=1000000000 && qu.Float64All(tmp["bidamount"])>0{
|
|
|
+ tmp["is_dif_ratioMoney"] = true
|
|
|
+ }else {
|
|
|
+ fieldSource["bidamount"] = map[string]interface{}{
|
|
|
+ "ext_type":"",
|
|
|
+ "ext_from":"package",
|
|
|
+ }
|
|
|
+ tmp["bidamount"] = tmpBidamount
|
|
|
}
|
|
|
- tmp["bidamount"] = tmpBidamount
|
|
|
}
|
|
|
} else {
|
|
|
//包数等于1,tmp没有值取包里的值
|
|
@@ -2201,8 +2284,10 @@ func AnalysisSaveResult(j, jf *ju.Job, e *ExtractTask) {
|
|
|
|
|
|
//添加字段来源
|
|
|
tmp["field_source"] = fieldSource
|
|
|
- //添加字段来源
|
|
|
-
|
|
|
+ //是否为不规则表格字段
|
|
|
+ if j.IsUnRulesTab {
|
|
|
+ tmp["is_UnRules_Tab"]= j.IsUnRulesTab
|
|
|
+ }
|
|
|
for k, v := range *doc {
|
|
|
if utf8.RuneCountInString(qu.ObjToString(v)) > 100000 {
|
|
|
(*doc)[k] = []rune(qu.ObjToString(v))[:100000]
|