123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366 |
- package src5
- import (
- "math"
- qutil "qfw/util"
- "regexp"
- "strings"
- )
- //完善判重数据检测-前置条件
- func convertArabicNumeralsAndLetters(data string) string {
- newData :=data
- res1, _ := regexp.Compile("[a-zA-Z]+");
- if res1.MatchString(data) {
- newData = res1.ReplaceAllStringFunc(data, strings.ToUpper);
- }
- res2, _ := regexp.Compile("[0-9]+");
- if res2.MatchString(newData) {
- arr1:=[]string {"0","1","2","3","4","5","6","7","8","9"}
- arr2:=[]string {"零","一","二","三","四","五","六","七","八","九"}
- for i:=0 ;i<len(arr1) ;i++ {
- resTemp ,_:=regexp.Compile(arr1[i])
- newData= resTemp.ReplaceAllString(newData, arr2[i]);
- }
- }
- return newData
- }
- func dealWithSpecialPhrases(str1 string,str2 string) (string,string) {
- newStr1:=str1
- newStr2:=str2
- res, _ := regexp.Compile("重新招标");
- if res.MatchString(newStr1) {
- newStr1 = res.ReplaceAllString(newStr1,"重招");
- }
- if res.MatchString(newStr2) {
- newStr2 = res.ReplaceAllString(newStr2,"重招");
- }
- return newStr1,newStr2
- }
- //关键词数量v
- func dealWithSpecialWordNumber(info*Info,v*Info) int {
- okNum:=0
- if info.titleSpecialWord || info.specialWord {
- okNum++
- }
- if v.titleSpecialWord || v.specialWord {
- okNum++
- }
- return okNum
- }
- //关键词再次判断
- func againRepeat(v *Info, info *Info ,site bool) bool {
- if isPublishtimeInterval(info.publishtime,v.publishtime) && site {
- return true
- }
- if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
- return true
- }
- if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- return true
- }
- if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
- return true
- }
- if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
- return true
- }
- if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
- return true
- }
- if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
- return true
- }
- if v.title != info.title && v.title != "" && info.title != ""{
- if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
- return true
- }
- }
- if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
- return true
- }
- return false
- }
- //均含有关键词再次判断
- func againContainSpecialWord (v *Info, info *Info) bool {
- if isBidopentimeInterval(info.bidopentime,v.bidopentime) {
- return true
- }
- if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- return true
- }
- if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
- return true
- }
- if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
- return true
- }
- if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
- return true
- }
- if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
- return true
- }
- //提取标题-标段号处理
- if dealTitleSpecial(v.title,info.title) {
- return true
- }
- return false
- }
- //提取标题-标段号处理
- func dealTitleSpecial(title1 string,title2 string) bool{
- regular1 := "(包|标段|标包)[((]?[0-9a-zA-Z一二三四五六七八九十零123456789]+[))]?"
- regular2 := "[0-9a-zA-Z一二三四五六七八九十零123456789]+(包|标段|标包)"
- regx1_1,_ := regexp.Compile(regular1)
- str1:=regx1_1.FindString(title1)
- if str1!="" {
- //log.Println("标题1,规则一提取:",str1)
- }else {
- regx1_2,_ := regexp.Compile(regular2)
- str1=regx1_2.FindString(title1)
- if str1!="" {
- //log.Println("标题1,规则二提取:",str1)
- }
- }
- regx2_1,_ := regexp.Compile(regular1)
- str2:=regx2_1.FindString(title2)
- if str2!="" {
- //log.Println("标题2,规则一提取:",str2)
- }else {
- regx2_2,_ := regexp.Compile(regular2)
- str2=regx2_2.FindString(title2)
- if str2!="" {
- //log.Println("标题2,规则二提取:",str2)
- }
- }
- //根据提取的结果,在进行清洗
- if str1!="" {
- str1 = deleteExtraSpace(str1)
- str1= strings.Replace(str1, "(", "", -1)
- str1= strings.Replace(str1, "(", "", -1)
- str1= strings.Replace(str1, ")", "", -1)
- str1= strings.Replace(str1, ")", "", -1)
- str1 = convertArabicNumeralsAndLetters(str1)
- }
- if str2!="" {
- str2 = deleteExtraSpace(str2)
- str2= strings.Replace(str2, "(", "", -1)
- str2= strings.Replace(str2, "(", "", -1)
- str2= strings.Replace(str2, ")", "", -1)
- str2= strings.Replace(str2, ")", "", -1)
- str2 = convertArabicNumeralsAndLetters(str2)
- }
- //log.Println("最终:",str1,str2)
- if str1!=str2 {
- //log.Println("不一致")
- return true
- }else {
- //log.Println("一致")
- return false
- }
- }
- //删除中标单位字符串中多余的空格(含tab)
- func deleteExtraSpace(s string) string {
- //删除字符串中的多余空格,有多个空格时,仅保留一个空格
- s1 := strings.Replace(s, " ", " ", -1) //替换tab为空格
- regstr := "\\s{2,}" //两个及两个以上空格的正则表达式
- reg, _ := regexp.Compile(regstr) //编译正则表达式
- s2 := make([]byte, len(s1)) //定义字符数组切片
- copy(s2, s1) //将字符串复制到切片
- spc_index := reg.FindStringIndex(string(s2)) //在字符串中搜索
- for len(spc_index) > 0 { //找到适配项
- s2 = append(s2[:spc_index[0]+1], s2[spc_index[1]:]...) //删除多余空格
- spc_index = reg.FindStringIndex(string(s2)) //继续在字符串中搜索
- }
- return string(s2)
- }
- //中标金额倍率:10000
- func isBidWinningAmount(f1 float64 ,f2 float64) bool {
- if f1==f2||f1*10000==f2||f2*10000==f1 {
- return false
- }
- return true
- }
- //时间间隔周期
- func isTimeIntervalPeriod(i1 int64 ,i2 int64) bool {
- if math.Abs(float64(i1-i2)) < 172800.0 {
- return true
- }else {
- return false //大于48小时
- }
- }
- //开标时间区间为一天
- func isBidopentimeInterval(i1 int64 ,i2 int64) bool {
- if i1==0||i2==0 {
- return false
- }
- //不在同一天-或者同一天间隔超过六小时,属于不相等返回true
- timeOne,timeTwo:=i1,i2
- day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
- day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
- if day1==day2 {
- //是否间隔超过十二小时
- if math.Abs(float64(i1-i2)) >43200.0 {
- return true
- }else {
- return false
- }
- }else {
- return true
- }
- }
- //发布时间区间为一天
- func isPublishtimeInterval(i1 int64 ,i2 int64) bool {
- if i1==0||i2==0 {
- return false
- }
- //不在同一天-或者同一天间隔超过12小时,属于不相等返回true
- timeOne,timeTwo:=i1,i2
- day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
- day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
- if day1==day2 {
- //是否间隔超过十二小时
- if math.Abs(float64(i1-i2)) >=43200.0 {
- return true
- }else {
- return false
- }
- }else {
- return true
- }
- }
- //开标时间区间为一天
- func isTheSameDay(i1 int64 ,i2 int64) bool {
- if i1==0||i2==0 {
- return false
- }
- timeOne,timeTwo:=i1,i2
- day1 := qutil.FormatDateByInt64(&timeOne, qutil.Date_yyyyMMdd)
- day2 := qutil.FormatDateByInt64(&timeTwo, qutil.Date_yyyyMMdd)
- if day1==day2 {
- return true
- }
- //if math.Abs(float64(i1-i2)) <=86400.0 {
- // return true
- //}
- return false
- }
- //前置0 五要素均相等认为重复
- func leadingElementSame(v *Info, info *Info) bool {
- isok:= 0
- if info.projectname != "" && v.projectname == info.projectname {
- isok++
- }
- if info.buyer != "" && v.buyer == info.buyer {
- isok++
- }
- if info.subtype == "合同" || info.subtype == "验收" || info.subtype == "违规" {
- if info.contractnumber != "" && v.contractnumber == info.contractnumber {
- isok++
- }
- }else {
- if info.projectcode != "" && v.projectcode == info.projectcode {
- isok++
- }
- }
- if info.title != "" && v.title == info.title {
- isok++
- }
- if v.agency == info.agency &&info.agency != "" {
- isok++
- }
- if v.winner == info.winner&&info.winner != "" {
- isok++
- }
- if isok>=5 {
- return true
- }
- return false
- }
- //buyer的优先级
- func buyerIsContinue(v *Info, info *Info) bool {
- if !isTheSameDay(info.publishtime,v.publishtime) {
- return true
- }
- if v.title != info.title && v.title != "" && info.title != ""{
- if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
- return true
- }
- }
- if v.projectname != info.projectname && v.projectname != "" && info.projectname != ""{
- return true
- }
- //if v.budget != info.budget && v.budget != 0 && info.budget != 0 {
- // return true
- //}
- //if isBidWinningAmount(v.bidamount,info.bidamount) && v.bidamount != 0 && info.bidamount != 0{
- // return true
- //}
- //if deleteExtraSpace(v.winner) != deleteExtraSpace(info.winner) && v.winner != "" && info.winner != "" {
- // return true
- //}
- if v.contractnumber != "" && info.contractnumber != "" && v.contractnumber != info.contractnumber {
- return true
- }
- if v.projectcode != "" && info.projectcode != "" && v.projectcode != info.projectcode {
- return true
- }
- return false
- }
- //无效数据
- func invalidData(d1 string, d2 string, d3 string, d4 string) bool {
- var n int
- if d1 != "" {
- n++
- }
- if d2 != "" {
- n++
- }
- if d3 != "" {
- n++
- }
- if d4 != "" {
- n++
- }
- if n == 0 {
- return true
- }
- return false
- }
|