init.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. package main
  2. import (
  3. "log"
  4. "math"
  5. mu "mfw/util"
  6. "qfw/util"
  7. "regexp"
  8. "sort"
  9. "strings"
  10. "sync"
  11. "go.mongodb.org/mongo-driver/bson/primitive"
  12. )
  13. var (
  14. Sysconfig map[string]interface{} //读取配置文件
  15. MongoTool, MgoBidding *MongodbSim //mongodb连接
  16. ExtractColl, ProjectColl, BackupColl, SiteColl string //抽取表、项目表、项目快照表、站点表
  17. ExtractColl1 string
  18. Thread int //配置项线程数
  19. BlackList []interface{}
  20. BlaskListMap map[string]bool
  21. )
  22. var (
  23. //判断是日期
  24. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  25. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  26. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  27. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  28. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  29. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  30. //判断带有分包、等特定词的
  31. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  32. //判断包含数值
  33. nreg1 = regexp.MustCompile("[0-9]{2,}")
  34. //判断包含字母
  35. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  36. //判断包含汉字
  37. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  38. //判断项目编号是在10以内的纯数字结构
  39. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  40. //仅初始化使用
  41. compareNoPass = map[string]bool{}
  42. compareAB = map[string]bool{}
  43. compareAB2D = map[string]bool{}
  44. compareABD = map[string]bool{}
  45. compareAB2CD = map[string]bool{}
  46. compareABCD = map[string]bool{}
  47. )
  48. func init() {
  49. util.ReadConfig(&Sysconfig)
  50. MongoTool = &MongodbSim{
  51. MongodbAddr: Sysconfig["mongodbServers"].(string),
  52. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  53. DbName: Sysconfig["mongodbName"].(string),
  54. }
  55. MongoTool.InitPool()
  56. bidding, _ := Sysconfig["bidding"].(map[string]interface{})
  57. MgoBidding = &MongodbSim{
  58. MongodbAddr: bidding["addr"].(string),
  59. Size: util.IntAll(bidding["dbsize"]),
  60. DbName: bidding["dbname"].(string),
  61. }
  62. MgoBidding.InitPool()
  63. ExtractColl = Sysconfig["extractColl"].(string)
  64. ExtractColl1 = Sysconfig["extractColl1"].(string)
  65. ProjectColl = Sysconfig["projectColl"].(string)
  66. BackupColl = Sysconfig["projectColl"].(string) + "_back"
  67. SiteColl = Sysconfig["siteColl"].(string)
  68. Thread = util.IntAll(Sysconfig["thread"])
  69. //NextNode = Sysconfig["nextNode"].([]interface{})
  70. udpport, _ := Sysconfig["udpport"].(string)
  71. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  72. udpclient.Listen(processUdpMsg)
  73. log.Println("Udp服务监听", udpport)
  74. BlackList = Sysconfig["rp_blacklist"].([]interface{})
  75. BlaskListMap = make(map[string]bool)
  76. for _, v := range BlackList {
  77. BlaskListMap[util.ObjToString(v)] = true
  78. }
  79. initWinnerRegexp()
  80. initBuyerRegexp()
  81. initAgencyRegexp()
  82. //加载项目数据
  83. //---不能通过
  84. vm := []string{"C", "D"}
  85. for i := 0; i < 2; i++ {
  86. for j := 0; j < 2; j++ {
  87. for k := 0; k < 2; k++ {
  88. key := vm[i] + vm[j] + vm[k]
  89. compareNoPass[key] = true
  90. //fmt.Println(key)
  91. }
  92. }
  93. }
  94. //fmt.Println("-------------------")
  95. //三个元素一致 [AB][AB][AB],分值最高
  96. vm = []string{"A", "B"}
  97. for i := 0; i < 2; i++ {
  98. for j := 0; j < 2; j++ {
  99. for k := 0; k < 2; k++ {
  100. key := vm[i] + vm[j] + vm[k]
  101. compareAB[key] = true
  102. //fmt.Println(key)
  103. }
  104. }
  105. }
  106. //fmt.Println("-------------------", len(compareAB))
  107. //---至少两个一致,其他可能不存在
  108. //[AB][AB][ABD]
  109. //[AB][ABD][AB]
  110. vm = []string{"A", "B"}
  111. vm2 := []string{"A", "B", "D"}
  112. for i := 0; i < 2; i++ {
  113. for j := 0; j < 2; j++ {
  114. for k := 0; k < 3; k++ {
  115. key := vm[i] + vm[j] + vm2[k]
  116. if !compareAB[key] {
  117. compareAB2D[key] = true
  118. //fmt.Println(key)
  119. }
  120. }
  121. }
  122. }
  123. for i := 0; i < 2; i++ {
  124. for j := 0; j < 3; j++ {
  125. for k := 0; k < 2; k++ {
  126. key := vm[i] + vm2[j] + vm[k]
  127. if !compareAB[key] {
  128. compareAB2D[key] = true
  129. //fmt.Println(key)
  130. }
  131. }
  132. }
  133. }
  134. //fmt.Println("-------------------", len(compareAB2D))
  135. //---至少一个一致,其他可能不存在
  136. //[ABD][ABD][ABD] //已经删除DDD
  137. vm = []string{"A", "B", "D"}
  138. for i := 0; i < 3; i++ {
  139. for j := 0; j < 3; j++ {
  140. for k := 0; k < 3; k++ {
  141. key := vm[i] + vm[j] + vm[k]
  142. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  143. compareABD[key] = true
  144. //fmt.Println(key)
  145. }
  146. }
  147. }
  148. }
  149. //fmt.Println("-------------------", len(compareABD))
  150. //[AB][ABCD][AB]
  151. //[AB][AB][ABCD]
  152. vm = []string{"A", "B"}
  153. vm2 = []string{"A", "B", "C", "D"}
  154. for i := 0; i < 2; i++ {
  155. for j := 0; j < 4; j++ {
  156. for k := 0; k < 2; k++ {
  157. key := vm[i] + vm2[j] + vm[k]
  158. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  159. compareAB2CD[key] = true
  160. //fmt.Println(key)
  161. }
  162. }
  163. }
  164. }
  165. for i := 0; i < 2; i++ {
  166. for j := 0; j < 2; j++ {
  167. for k := 0; k < 4; k++ {
  168. key := vm[i] + vm[j] + vm2[k]
  169. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  170. compareAB2CD[key] = true
  171. //fmt.Println(key)
  172. }
  173. }
  174. }
  175. }
  176. //fmt.Println("-------------------", len(compareAB2CD))
  177. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  178. vm = []string{"A", "B", "C", "D"}
  179. for i := 0; i < 4; i++ {
  180. for j := 0; j < 4; j++ {
  181. for k := 0; k < 4; k++ {
  182. key := vm[i] + vm[j] + vm[k]
  183. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  184. compareABCD[key] = true
  185. //fmt.Println(key)
  186. }
  187. }
  188. }
  189. }
  190. }
  191. func CheckHanAndNum(str string) (b bool) {
  192. return nreg1.MatchString(str) && hreg1.MatchString(str)
  193. }
  194. func CheckZimuAndNum(str string) (b bool) {
  195. return zreg1.MatchString(str) && nreg1.MatchString(str)
  196. }
  197. type KeyMap struct {
  198. Lock sync.Mutex
  199. Map map[string]*Key
  200. }
  201. type ID struct {
  202. Id string
  203. Lock sync.Mutex
  204. P *ProjectInfo
  205. }
  206. type Key struct {
  207. Arr []string
  208. Lock sync.Mutex
  209. }
  210. type IdAndLock struct {
  211. Id string
  212. Lock sync.Mutex
  213. }
  214. func NewKeyMap() *KeyMap {
  215. return &KeyMap{
  216. Map: map[string]*Key{},
  217. Lock: sync.Mutex{},
  218. }
  219. }
  220. //招标信息实体类
  221. type Info struct {
  222. Id string `json:"_id"`
  223. Href string `json:"href"` //源地址
  224. Publishtime int64 `json:"publishtime"`
  225. Comeintime int64 `json:"comeintime"`
  226. Title string `json:"title"`
  227. TopType string `json:"toptype"`
  228. SubType string `json:"subtype"`
  229. ProjectName string `json:"projectname"`
  230. ProjectCode string `json:"projectcode"`
  231. ProjectScope string `json:"projectscope"`
  232. ContractCode string `json:"contractcode"`
  233. Buyer string `json:"buyer"`
  234. Buyerperson string `json:"buyerperson"`
  235. Buyertel string `json:"buyertel"`
  236. Agency string `json:"agency"`
  237. Area string `json:"area"`
  238. City string `json:"city"`
  239. District string `json:"district"`
  240. Infoformat int `json:"infoformat"`
  241. ReviewExperts []string `json:"review_experts"`
  242. Purchasing string `json:"purchasing"`
  243. WinnerOrder []map[string]interface{} `json:"winnerorder"`
  244. ProjectScale string `json:"project_scale"`
  245. ProjectDuration int `json:"project_duration"`
  246. ProjectTimeUnit string `json:"project_timeunit"`
  247. ProjectStartDate int64 `json:"project_startdate"`
  248. ProjectCompleteDate int64 `json:"project_completedate"`
  249. Payway string `json:"payway"`
  250. ContractGuarantee bool `json:"contract_guarantee"`
  251. BidGuarantee bool `json:"bid_guarantee"`
  252. Qualifies []map[string]interface{} `json:"qualifies"`
  253. EntIdList []string `json:"entidlist"`
  254. HasPackage bool // `json:"haspackage"`
  255. Package map[string]interface{} `json:"package"`
  256. Topscopeclass []string `json:"topscopeclass"`
  257. Subscopeclass []string `json:"subscopeclass"`
  258. Buyerclass string `json:"buyerclass"`
  259. Bidopentime int64 `json:"bidopentime"`
  260. Budget float64 `json:"budget"`
  261. Bidamount float64 `json:"bidamount"`
  262. Winners []string
  263. dealtype int
  264. PTC string //从标题中抽的项目编号
  265. pnbval int //项目名称、编号、采购单位存在的个数
  266. LenPC int //项目编号长度
  267. LenPN int //项目名称长度
  268. LenPTC int //标题抽的项目编号长度
  269. //以下三个元素做对比,计算包含时候使用
  270. PNBH int //0初始,+包含,-被包含
  271. PCBH int
  272. PTCBH int
  273. }
  274. //项目实体类
  275. type ProjectInfo struct {
  276. Id primitive.ObjectID `json:"_id"`
  277. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  278. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  279. Ids []string `json:"ids,omitempty"`
  280. Topscopeclass []string `json:"topscopeclass,omitempty"`
  281. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  282. Winners []string `json:"s_winner,omitempty"` //中标人
  283. ProjectName string `json:"projectname,omitempty"` //项目名称
  284. ProjectCode string `json:"projectcode,omitempty"` //项目代码唯一(纯数字的权重低)
  285. ContractCode string `json:"contractcode,omitempty"` //项目编号
  286. Buyer string `json:"buyer,omitempty"` //采购单位唯一
  287. MPN []string `json:"mpn,omitempty"` //合并后多余的项目名称
  288. MPC []string `json:"mpc,omitempty"` //合并后多余的项目编号
  289. Buyerperson string `json:"buyerperson"` //采购联系人
  290. Buyertel string `json:"buyertel"` //采购联系人电话
  291. Agency string `json:"agency"` //代理机构
  292. Area string `json:"area"` //地区
  293. City string `json:"city"` //地市
  294. District string `json:"district"` //区县
  295. Bidstatus string `json:"bidstatus"` //
  296. Bidtype string `json:"bidtype"` //
  297. ReviewExperts []string `json:"review_experts"` // 项目评审专家
  298. Purchasing string `json:"purchasing"` // 标的物
  299. Package map[string]interface{} `json:"package,omitempty"` //分包的对比对象
  300. Buyerclass string `json:"buyerclass"` //采购单位分类
  301. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  302. Jgtime int64 `json:"jgtime"` //结果中标时间
  303. Zbtime int64 `json:"zbtime"` //招标时间
  304. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  305. Budget float64 `json:"budget,omitempty"` //预算
  306. Winnerorder []string `json:"winnerorder"` //中标候选人
  307. ProjectScale string `json:"project_scale"` //项目规模
  308. ProjectDuration int `json:"project_duration"` //工期时长
  309. ProjectTimeunit string `json:"project_timeunit"` //工期时长单位
  310. ProjectStartDate int64 `json:"project_startdate"` //开工日期
  311. ProjctCompleteDate int64 `json:"projct_completedate"` //竣工日期
  312. Payway string `json:"payway"` //付款方式
  313. ContractGuarantee bool `json:"contract_guarantee"` //履约保证金 是否支持包含
  314. BidGuarantee bool `json:"bid_guarantee"` //投标保证金 是否支持包含
  315. Qualifies string `json:"qualifies"` //资质条件
  316. EntIdList []string `json:"entidlist"` //企业id
  317. score int
  318. comStr string
  319. resVal, pjVal int
  320. InfoFiled map[string]InfoField `json:"infofield"` //逻辑处理需要的info字段
  321. Budgettag int `json:"budgettag"` //预算是否有效标记
  322. Bidamounttag int `json:"bidamounttag"` //中标金额是否有效标记
  323. }
  324. //存储部分招标信息字段,业务逻辑处理需要
  325. type InfoField struct {
  326. Budget float64 `json:"budget"`
  327. Bidamount float64 `json:"bidamount"`
  328. ContractCode string `json:"contractcode"`
  329. ProjectName string `json:"projectname"`
  330. ProjectCode string `json:"projectcode"`
  331. Bidstatus string `json:"bidstatus"`
  332. }
  333. //站点信息
  334. type Site struct {
  335. Id string `json:"_id"`
  336. Site string `json:"site"` //站点名字
  337. Area string `json:"area"` //省
  338. City string `json:"city"` //市
  339. District string `json:"district"` //区、县
  340. Domain string `json:"domain"` //地址
  341. Status int `json:"status"` //
  342. }
  343. //二分字符串查找
  344. func BinarySearch(s []string, k string) int {
  345. sort.Strings(s)
  346. lo, hi := 0, len(s)-1
  347. for lo <= hi {
  348. m := (lo + hi) >> 1
  349. if s[m] < k {
  350. lo = m + 1
  351. } else if s[m] > k {
  352. hi = m - 1
  353. } else {
  354. return m
  355. }
  356. }
  357. return -1
  358. }
  359. //计算文本相似度
  360. func CosineSimilar(srcWords1, dstWords1 string) float64 {
  361. srcWords, dstWords := strings.Split(srcWords1, ""), strings.Split(dstWords1, "")
  362. // get all words
  363. allWordsMap := make(map[string]int, 0)
  364. for _, word := range srcWords {
  365. if _, found := allWordsMap[word]; !found {
  366. allWordsMap[word] = 1
  367. } else {
  368. allWordsMap[word] += 1
  369. }
  370. }
  371. for _, word := range dstWords {
  372. if _, found := allWordsMap[word]; !found {
  373. allWordsMap[word] = 1
  374. } else {
  375. allWordsMap[word] += 1
  376. }
  377. }
  378. // stable the sort
  379. allWordsSlice := make([]string, 0)
  380. for word, _ := range allWordsMap {
  381. allWordsSlice = append(allWordsSlice, word)
  382. }
  383. // assemble vector
  384. srcVector := make([]int, len(allWordsSlice))
  385. dstVector := make([]int, len(allWordsSlice))
  386. for _, word := range srcWords {
  387. if index := BinarySearch(allWordsSlice, word); index != -1 {
  388. srcVector[index] += 1
  389. }
  390. }
  391. for _, word := range dstWords {
  392. if index := BinarySearch(allWordsSlice, word); index != -1 {
  393. dstVector[index] += 1
  394. }
  395. }
  396. // calc cos
  397. numerator := float64(0)
  398. srcSq := 0
  399. dstSq := 0
  400. for i, srcCount := range srcVector {
  401. dstCount := dstVector[i]
  402. numerator += float64(srcCount * dstCount)
  403. srcSq += srcCount * srcCount
  404. dstSq += dstCount * dstCount
  405. }
  406. denominator := math.Sqrt(float64(srcSq * dstSq))
  407. v1 := numerator / denominator
  408. // if v1 > 0.6 {
  409. // log.Println(v1, srcWords1, dstWords1)
  410. // }
  411. return v1
  412. }
  413. func initWinnerRegexp() {
  414. winRegMap := Sysconfig["winner"].(map[string]interface{})
  415. preRegexps := winRegMap["pre_regexp"].([]interface{})
  416. backRegexps := winRegMap["back_regexp"].([]interface{})
  417. backRepRegexps := winRegMap["back_rep_regexp"].([]interface{})
  418. backBlack := winRegMap["blacklist"].([]interface{})
  419. var winPreRegexps []*regexp.Regexp
  420. for _, v := range preRegexps {
  421. reg := regexp.MustCompile("^" + v.(string))
  422. winPreRegexps = append(winPreRegexps, reg)
  423. }
  424. PreRegexp["winner"] = winPreRegexps
  425. var winBackRegexps []*regexp.Regexp
  426. for _, v := range backRegexps {
  427. reg := regexp.MustCompile(v.(string))
  428. winBackRegexps = append(winBackRegexps, reg)
  429. }
  430. BackRegexp["winner"] = winBackRegexps
  431. var winBackRepRegexps []RegexpInfo
  432. for _, v := range backRepRegexps {
  433. reps := strings.Split(v.(string), "#")
  434. if len(reps) > 1 {
  435. reg := RegexpInfo{
  436. regs: regexp.MustCompile(reps[0]),
  437. repstr: reps[1],
  438. }
  439. winBackRepRegexps = append(winBackRepRegexps, reg)
  440. }
  441. }
  442. BackRepRegexp["winner"] = winBackRepRegexps
  443. var winBlackRegexps []*regexp.Regexp
  444. for _, v := range backBlack {
  445. reg := regexp.MustCompile(v.(string))
  446. winBlackRegexps = append(winBlackRegexps, reg)
  447. }
  448. BlackRegexp["winner"] = winBlackRegexps
  449. }
  450. func initBuyerRegexp() {
  451. buyRegMap := Sysconfig["buyer"].(map[string]interface{})
  452. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  453. backRegexps := buyRegMap["back_regexp"].([]interface{})
  454. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  455. backBlack := buyRegMap["blacklist"].([]interface{})
  456. var winPreRegexps []*regexp.Regexp
  457. for _, v := range preRegexps {
  458. reg := regexp.MustCompile("^" + v.(string))
  459. winPreRegexps = append(winPreRegexps, reg)
  460. }
  461. PreRegexp["buyer"] = winPreRegexps
  462. var winBackRegexps []*regexp.Regexp
  463. for _, v := range backRegexps {
  464. reg := regexp.MustCompile(v.(string))
  465. winBackRegexps = append(winBackRegexps, reg)
  466. }
  467. BackRegexp["buyer"] = winBackRegexps
  468. var winBackRepRegexps []RegexpInfo
  469. for _, v := range backRepRegexps {
  470. reps := strings.Split(v.(string), "#")
  471. if len(reps) > 1 {
  472. reg := RegexpInfo{
  473. regs: regexp.MustCompile(reps[0]),
  474. repstr: reps[1],
  475. }
  476. winBackRepRegexps = append(winBackRepRegexps, reg)
  477. }
  478. }
  479. BackRepRegexp["buyer"] = winBackRepRegexps
  480. var winBlackRegexps []*regexp.Regexp
  481. for _, v := range backBlack {
  482. reg := regexp.MustCompile(v.(string))
  483. winBlackRegexps = append(winBlackRegexps, reg)
  484. }
  485. BlackRegexp["buyer"] = winBlackRegexps
  486. }
  487. func initAgencyRegexp() {
  488. buyRegMap := Sysconfig["agency"].(map[string]interface{})
  489. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  490. backRegexps := buyRegMap["back_regexp"].([]interface{})
  491. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  492. backBlack := buyRegMap["blacklist"].([]interface{})
  493. var winPreRegexps []*regexp.Regexp
  494. for _, v := range preRegexps {
  495. reg := regexp.MustCompile("^" + v.(string))
  496. winPreRegexps = append(winPreRegexps, reg)
  497. }
  498. PreRegexp["agency"] = winPreRegexps
  499. var winBackRegexps []*regexp.Regexp
  500. for _, v := range backRegexps {
  501. reg := regexp.MustCompile(v.(string))
  502. winBackRegexps = append(winBackRegexps, reg)
  503. }
  504. BackRegexp["agency"] = winBackRegexps
  505. var winBackRepRegexps []RegexpInfo
  506. for _, v := range backRepRegexps {
  507. reps := strings.Split(v.(string), "#")
  508. if len(reps) > 1 {
  509. reg := RegexpInfo{
  510. regs: regexp.MustCompile(reps[0]),
  511. repstr: reps[1],
  512. }
  513. winBackRepRegexps = append(winBackRepRegexps, reg)
  514. }
  515. }
  516. BackRepRegexp["agency"] = winBackRepRegexps
  517. var winBlackRegexps []*regexp.Regexp
  518. for _, v := range backBlack {
  519. reg := regexp.MustCompile(v.(string))
  520. winBlackRegexps = append(winBlackRegexps, reg)
  521. }
  522. BlackRegexp["agency"] = winBlackRegexps
  523. }