|
@@ -14,7 +14,7 @@ var cleanNameReg_3 = regexp.MustCompile("(公开|的)(比选|招标|单一来源
|
|
|
|
|
|
var un_cleanNameReg_1 = regexp.MustCompile("(项目[一二三四五六七八九1-9][次](招标|中标|中标结果|成交|候选人|竞谈|竞争性磋商)(公告)?)$")
|
|
|
|
|
|
-//完善判重数据检测-前置条件
|
|
|
+// 完善判重数据检测-前置条件
|
|
|
func convertArabicNumeralsAndLetters(data string) string {
|
|
|
newData := data
|
|
|
res1, _ := regexp.Compile("[a-zA-Z]+")
|
|
@@ -33,7 +33,7 @@ func convertArabicNumeralsAndLetters(data string) string {
|
|
|
return newData
|
|
|
}
|
|
|
|
|
|
-//特殊词处理
|
|
|
+// 特殊词处理
|
|
|
func dealWithSpecialPhrases(str1 string, str2 string) (string, string) {
|
|
|
newStr1 := str1
|
|
|
newStr2 := str2
|
|
@@ -47,7 +47,7 @@ func dealWithSpecialPhrases(str1 string, str2 string) (string, string) {
|
|
|
return newStr1, newStr2
|
|
|
}
|
|
|
|
|
|
-//关键词数量v
|
|
|
+// 关键词数量v
|
|
|
func dealWithSpecialWordNumber(info *Info, v *Info) int {
|
|
|
okNum := 0
|
|
|
if info.titleSpecialWord || info.specialWord {
|
|
@@ -59,7 +59,7 @@ func dealWithSpecialWordNumber(info *Info, v *Info) int {
|
|
|
return okNum
|
|
|
}
|
|
|
|
|
|
-//关键词再次判断
|
|
|
+// 关键词再次判断
|
|
|
func againRepeat(v *Info, info *Info, site bool) bool {
|
|
|
if isPublishtimeInterval(info.publishtime, v.publishtime) && site {
|
|
|
return true
|
|
@@ -93,7 +93,7 @@ func againRepeat(v *Info, info *Info, site bool) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//均含有关键词再次判断
|
|
|
+// 均含有关键词再次判断
|
|
|
func againContainSpecialWord(v *Info, info *Info) bool {
|
|
|
if isBidopentimeInterval(info.bidopentime, v.bidopentime) {
|
|
|
return true
|
|
@@ -121,7 +121,7 @@ func againContainSpecialWord(v *Info, info *Info) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//提取标题-标段号处理
|
|
|
+// 提取标题-标段号处理
|
|
|
func dealTitleSpecial(title1 string, title2 string) bool {
|
|
|
regular1 := "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789]+[))]?"
|
|
|
regular2 := "[0-9a-zA-Z一二三四五六七八九十零123456789]+(包|标段|标包)"
|
|
@@ -155,7 +155,7 @@ func dealTitleSpecial(title1 string, title2 string) bool {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//删除中标单位字符串中多余的空格(含tab)
|
|
|
+// 删除中标单位字符串中多余的空格(含tab)
|
|
|
func deleteExtraSpaceName(s string) string {
|
|
|
//删除字符串中的多余空格,有多个空格时,仅保留一个空格
|
|
|
s1 := strings.Replace(s, " ", " ", -1) //替换tab为空格
|
|
@@ -171,7 +171,7 @@ func deleteExtraSpaceName(s string) string {
|
|
|
return string(s2)
|
|
|
}
|
|
|
|
|
|
-//中标金额倍率:10000
|
|
|
+// 中标金额倍率:10000
|
|
|
func isBidWinningAmount(f1 float64, f2 float64) bool {
|
|
|
if f1 == f2 || f1*10000 == f2 || f2*10000 == f1 {
|
|
|
return false
|
|
@@ -179,7 +179,7 @@ func isBidWinningAmount(f1 float64, f2 float64) bool {
|
|
|
return true
|
|
|
}
|
|
|
|
|
|
-//时间间隔周期
|
|
|
+// 时间间隔周期
|
|
|
func isTimeIntervalPeriod(i1 int64, i2 int64) bool {
|
|
|
if math.Abs(float64(i1-i2)) < 172800.0 {
|
|
|
return true
|
|
@@ -188,7 +188,7 @@ func isTimeIntervalPeriod(i1 int64, i2 int64) bool {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//开标时间区间为一天
|
|
|
+// 开标时间区间为一天
|
|
|
func isBidopentimeInterval(i1 int64, i2 int64) bool {
|
|
|
if i1 == 0 || i2 == 0 {
|
|
|
return false
|
|
@@ -209,7 +209,7 @@ func isBidopentimeInterval(i1 int64, i2 int64) bool {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//发布时间区间为一天
|
|
|
+// 发布时间区间为一天
|
|
|
func isPublishtimeInterval(i1 int64, i2 int64) bool {
|
|
|
if i1 == 0 || i2 == 0 {
|
|
|
return false
|
|
@@ -230,7 +230,7 @@ func isPublishtimeInterval(i1 int64, i2 int64) bool {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
-//时间区间为一天
|
|
|
+// 时间区间为一天
|
|
|
func isTheSameDay(i1 int64, i2 int64) bool {
|
|
|
if i1 == 0 || i2 == 0 {
|
|
|
return false
|
|
@@ -244,7 +244,7 @@ func isTheSameDay(i1 int64, i2 int64) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//前置0 五要素均相等认为重复
|
|
|
+// 前置0 五要素均相等认为重复
|
|
|
func leadingElementSame(v *Info, info *Info) bool {
|
|
|
isok := 0
|
|
|
if info.projectname != "" && v.projectname == info.projectname {
|
|
@@ -288,7 +288,7 @@ func leadingElementSame(v *Info, info *Info) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//前置0 竞品要素简易计算
|
|
|
+// 前置0 竞品要素简易计算
|
|
|
func jingPinElementSame(v *Info, info *Info) bool {
|
|
|
if info.projectname != "" && v.projectname != info.projectname {
|
|
|
return false
|
|
@@ -305,7 +305,7 @@ func jingPinElementSame(v *Info, info *Info) bool {
|
|
|
return true
|
|
|
}
|
|
|
|
|
|
-//buyer的优先级
|
|
|
+// buyer的优先级
|
|
|
func buyerIsContinue(v *Info, info *Info) bool {
|
|
|
if !isTheSameDay(info.publishtime, v.publishtime) {
|
|
|
return true
|
|
@@ -327,7 +327,7 @@ func buyerIsContinue(v *Info, info *Info) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//判断~是否需要替换数据相关
|
|
|
+// 判断~是否需要替换数据相关
|
|
|
func judgeIsReplaceInfo(s_href string, i_href string) bool {
|
|
|
if strings.Contains(s_href, "https://www.jianyu360.cn") && i_href != "" &&
|
|
|
!strings.Contains(i_href, "https://www.jianyu360.cn") {
|
|
@@ -336,7 +336,7 @@ func judgeIsReplaceInfo(s_href string, i_href string) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//查询抽取表数据
|
|
|
+// 查询抽取表数据
|
|
|
func confrimExtractData(source_id string, info_id string) (bool, map[string]interface{}, map[string]interface{}) {
|
|
|
source_data := map[string]interface{}{}
|
|
|
info_data := map[string]interface{}{}
|
|
@@ -353,7 +353,7 @@ func confrimExtractData(source_id string, info_id string) (bool, map[string]inte
|
|
|
return isvalid, info_data, source_data
|
|
|
}
|
|
|
|
|
|
-//查询历史抽取表数据
|
|
|
+// 查询历史抽取表数据
|
|
|
func confrimHistoryExtractData(source_id string, info_id string) (bool, bool, map[string]interface{}, map[string]interface{}) {
|
|
|
source_data := map[string]interface{}{}
|
|
|
info_data := map[string]interface{}{}
|
|
@@ -376,7 +376,7 @@ func confrimHistoryExtractData(source_id string, info_id string) (bool, bool, ma
|
|
|
return isvalid, isexists, info_data, source_data
|
|
|
}
|
|
|
|
|
|
-//查询bidding表数据
|
|
|
+// 查询bidding表数据
|
|
|
func confrimBiddingData(source_id string, info_id string) (bool, map[string]interface{}, map[string]interface{}) {
|
|
|
source_data := map[string]interface{}{}
|
|
|
info_data := map[string]interface{}{}
|
|
@@ -393,7 +393,7 @@ func confrimBiddingData(source_id string, info_id string) (bool, map[string]inte
|
|
|
return isvalid, info_data, source_data
|
|
|
}
|
|
|
|
|
|
-//是否为竞品链接
|
|
|
+// 是否为竞品链接
|
|
|
func IsJpHref(href string) bool {
|
|
|
if strings.Contains(href, "www.jianyu360") && href != "" {
|
|
|
return true
|
|
@@ -401,7 +401,7 @@ func IsJpHref(href string) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//验证竞品是否重复
|
|
|
+// 验证竞品是否重复
|
|
|
func confirmJingPinIsRepeatData(v *Info, info *Info) bool {
|
|
|
//标题验证~是否有关联~是否需要清洗数据-长度需要考虑
|
|
|
if v.c_title != "" && info.c_title != "" { //标题相似判断
|
|
@@ -433,7 +433,7 @@ func confirmJingPinIsRepeatData(v *Info, info *Info) bool {
|
|
|
return false
|
|
|
}
|
|
|
|
|
|
-//通用清洗~清洗名称~过滤冗余~
|
|
|
+// 通用清洗~清洗名称~过滤冗余~
|
|
|
func cleanNameFilterRedundant(name string) string {
|
|
|
new_name := name
|
|
|
new_name = cleanNameReg_0.ReplaceAllString(new_name, "")
|