method.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. package main
  2. import (
  3. log "github.com/donnie4w/go-logger/logger"
  4. "github.com/go-ego/gse"
  5. qu "qfw/util"
  6. "regexp"
  7. "strings"
  8. "unicode"
  9. "unicode/utf8"
  10. )
  11. //单位
  12. var specHeadReg *regexp.Regexp = regexp.MustCompile("^([a-zA-Z]{1,2}[\u4e00-\u9fa5]{6,}|某部|州|自治区|自治州|街道|名称|省|市|县|区|业绩|资格|中标|项目|预算单位)")
  13. var unHanHeadReg *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5])")
  14. var unConReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处|校)")
  15. var unEndReg *regexp.Regexp = regexp.MustCompile("^.*(公司|学(校)?|博物馆|联合社|合作社|监狱|办公厅|电视台|集团|机构|企业|办公室|委员会|实验室|联社|厂|场|院|所|店|小|台|中心|局|站|城|馆|厅|处|行|科|部|队|联合(会|体)|工作室)$")
  16. var unenableReg1 *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5]{1,2}(责任|有限|有限股份|有限责任|实业)公司|.*(某部|先生|女士|小姐)|工程技术处)$")
  17. var unenableReg2 *regexp.Regexp = regexp.MustCompile("(\\?|?|单位|#|xxxx|\\*\\*|%|万元|设计企业|免费|代表|代码标识|盖电子|测试测试|删除|错误|吊销|注销|发起人|待清理|&#|护照号|身份证号|\" +\n\t\"法人|&nbsp|国家拨入|借款|积累资金|认股人|--|、|&|`|美元)")
  18. //分词
  19. var GSE *gse.Segmenter = &gse.Segmenter{}
  20. //编号
  21. var codeUnConReg *regexp.Regexp = regexp.MustCompile("(null|勘察|测试|设计|设备|项目|标段|工程|监理|范围|分包|月|日|天)")
  22. var codeUnLenReg *regexp.Regexp = regexp.MustCompile("([\u4e00-\u9fa5]{9,})")
  23. var classMoneyScope map[string]map[string]interface{}
  24. func init() {
  25. log.Debug("初始化,切词")
  26. GSE.LoadDict("./dictionary.txt")
  27. //t>d>p
  28. classMoneyScope = map[string]map[string]interface{}{
  29. "建筑工程": {"min":10000,"max":10000000000},
  30. "行政办公": {"min":100,"max":100000000},
  31. "医疗卫生": {"min":1000,"max":100000000},
  32. "服务采购": {"min":10,"max":100000000},
  33. "机械设备": {"min":1000,"max":1000000000},
  34. "水利水电": {"min":1000,"max":1000000000},
  35. "能源化工": {"min":1000,"max":1000000000},
  36. "弱电安防": {"min":1000,"max":1000000000},
  37. "信息技术": {"min":100,"max":100000000},
  38. "交通工程": {"min":1000,"max":10000000000},
  39. "市政设施": {"min":1000,"max":10000000000},
  40. "农林牧渔": {"min":100,"max":10000000},
  41. }
  42. }
  43. //行业金额校验
  44. func checkingClassMoney(money float64,class string) bool {
  45. data :=classMoneyScope[class]
  46. if data!=nil {
  47. min := qu.Float64All(data["min"])
  48. max := qu.Float64All(data["max"])
  49. if money>min && money<max {
  50. return true
  51. }
  52. }
  53. return false
  54. }
  55. //企业库检测
  56. func qyNameIsExistsQYXY(name string) bool{
  57. q := map[string]interface{}{
  58. "company_name": name,
  59. }
  60. data :=qy_mgo.FindOne(qy_coll_name,q)
  61. if data==nil || len(data)<2{
  62. return false
  63. }
  64. return true
  65. }
  66. //采购单位库
  67. func buyerNameIsExists(name string) bool{
  68. q := map[string]interface{}{
  69. "buyer_name": name,
  70. }
  71. data :=qy_mgo.FindOne("buyer_enterprise",q)
  72. if data==nil || len(data)<2{
  73. return false
  74. }
  75. return true
  76. }
  77. //包含非中文
  78. func isUnHan(str string) bool {
  79. var count int
  80. for _, v := range str {
  81. if !unicode.Is(unicode.Han, v) {
  82. count++
  83. break
  84. }
  85. }
  86. return count > 0
  87. }
  88. //是否含中文
  89. func isHan(str string) bool {
  90. var count int
  91. for _, v := range str {
  92. if unicode.Is(unicode.Han, v) {
  93. count++
  94. break
  95. }
  96. }
  97. return count > 0
  98. }
  99. //符号数量
  100. func isCharCount(str string) []int {
  101. //中文,英文,数字,其他
  102. c1,c2,c3,c4:=0,0,0,0
  103. for _, v := range str {
  104. if unicode.Is(unicode.Han, v) {
  105. c1++
  106. }else if unicode.IsLetter(v){
  107. c2++
  108. } else if unicode.IsNumber(v){
  109. c3++
  110. }else {
  111. c4++
  112. }
  113. }
  114. return []int{c1,c2,c3,c4}
  115. }
  116. //中文比例-1:3
  117. func isHanLenToLittle(str string) bool {
  118. var count int
  119. len := utf8.RuneCountInString(str)
  120. min_count := len/3
  121. for _, v := range str {
  122. if unicode.Is(unicode.Han, v) {
  123. count++
  124. if count>=min_count {
  125. return true
  126. }
  127. }
  128. }
  129. return false
  130. }
  131. //是否含字母数字
  132. func isAlphanumeric(str string) bool {
  133. var count int
  134. for _, v := range str {
  135. if unicode.IsNumber(v) || unicode.IsLetter(v) {
  136. count++
  137. break
  138. }
  139. }
  140. return count > 0
  141. }
  142. //连续数字
  143. func isRegTimeDateCode(str string) bool {
  144. reg:=`\d{8}`
  145. regx,_ := regexp.Compile(reg)
  146. if regx.FindString(str)!="" {
  147. return false
  148. }
  149. if utf8.RuneCountInString(str)==8 {
  150. return true
  151. }
  152. return false
  153. }
  154. //配置字段初始分
  155. func dealWithFieldSourceScore(source map[string]interface{}) map[string]int64 {
  156. fieldArr := []string{"buyer","s_winner","budget","bidamount","projectname","projectcode"}
  157. score := make(map[string]int64,0)
  158. for _,v := range fieldArr{
  159. score[v] = int64(100)
  160. }
  161. for _,key := range fieldArr {
  162. ext := *qu.ObjToMap(source[key])
  163. if ext!=nil{
  164. ext_from:=qu.ObjToString(ext["ext_from"])
  165. ext_type:=qu.ObjToString(ext["ext_type"])
  166. //规范ext_from
  167. ext_from = normalizedExtFromName(ext_from)
  168. if ext_from=="winnerorder" || ext_from=="package" ||
  169. ext_from=="jsondata" || ext_type=="" {
  170. dataLock.Lock()
  171. score[key] = qu.Int64All(Ext_From[ext_from])
  172. dataLock.Unlock()
  173. }else {
  174. dataLock.Lock()
  175. s := qu.Int64All(Ext_From[ext_from])+qu.Int64All(Ext_Type[ext_type])
  176. score[key] = s/2
  177. dataLock.Unlock()
  178. }
  179. }
  180. }
  181. return score
  182. }
  183. //规范-抽取来源字符串
  184. func normalizedExtFromName(str string) string {
  185. if strings.Contains(str,"order") {
  186. str = "winnerorder"
  187. }else if strings.Contains(str,"JsonData") {
  188. str = "jsondata"
  189. }else {
  190. }
  191. return str
  192. }