init.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. package main
  2. import (
  3. "log"
  4. "math"
  5. mu "mfw/util"
  6. "qfw/util"
  7. "regexp"
  8. "sort"
  9. "strings"
  10. "sync"
  11. "go.mongodb.org/mongo-driver/bson/primitive"
  12. )
  13. var (
  14. Sysconfig map[string]interface{} //读取配置文件
  15. MongoTool, MgoBidding *MongodbSim //mongodb连接
  16. ExtractColl, ProjectColl, BackupColl, SiteColl string //抽取表、项目表、项目快照表、站点表
  17. Thread int //配置项线程数
  18. BlackList []interface{}
  19. BlaskListMap map[string]bool
  20. )
  21. var (
  22. //判断是日期
  23. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  24. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  25. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  26. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  27. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  28. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  29. //判断带有分包、等特定词的
  30. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  31. //判断包含数值
  32. nreg1 = regexp.MustCompile("[0-9]{2,}")
  33. //判断包含字母
  34. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  35. //判断包含汉字
  36. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  37. //判断项目编号是在10以内的纯数字结构
  38. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  39. //仅初始化使用
  40. compareNoPass = map[string]bool{}
  41. compareAB = map[string]bool{}
  42. compareAB2D = map[string]bool{}
  43. compareABD = map[string]bool{}
  44. compareAB2CD = map[string]bool{}
  45. compareABCD = map[string]bool{}
  46. )
  47. func init() {
  48. util.ReadConfig(&Sysconfig)
  49. MongoTool = &MongodbSim{
  50. MongodbAddr: Sysconfig["mongodbServers"].(string),
  51. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  52. DbName: Sysconfig["mongodbName"].(string),
  53. }
  54. MongoTool.InitPool()
  55. bidding, _ := Sysconfig["bidding"].(map[string]interface{})
  56. MgoBidding = &MongodbSim{
  57. MongodbAddr: bidding["addr"].(string),
  58. Size: util.IntAll(bidding["dbsize"]),
  59. DbName: bidding["dbname"].(string),
  60. }
  61. MgoBidding.InitPool()
  62. ExtractColl = Sysconfig["extractColl"].(string)
  63. ProjectColl = Sysconfig["projectColl"].(string)
  64. BackupColl = Sysconfig["projectColl"].(string) + "_back"
  65. SiteColl = Sysconfig["siteColl"].(string)
  66. Thread = util.IntAll(Sysconfig["thread"])
  67. //NextNode = Sysconfig["nextNode"].([]interface{})
  68. udpport, _ := Sysconfig["udpport"].(string)
  69. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  70. udpclient.Listen(processUdpMsg)
  71. log.Println("Udp服务监听", udpport)
  72. BlackList = Sysconfig["rp_blacklist"].([]interface{})
  73. BlaskListMap = make(map[string]bool)
  74. for _, v := range BlackList {
  75. BlaskListMap[util.ObjToString(v)] = true
  76. }
  77. initWinnerRegexp()
  78. initBuyerRegexp()
  79. initAgencyRegexp()
  80. //加载项目数据
  81. //---不能通过
  82. vm := []string{"C", "D"}
  83. for i := 0; i < 2; i++ {
  84. for j := 0; j < 2; j++ {
  85. for k := 0; k < 2; k++ {
  86. key := vm[i] + vm[j] + vm[k]
  87. compareNoPass[key] = true
  88. //fmt.Println(key)
  89. }
  90. }
  91. }
  92. //fmt.Println("-------------------")
  93. //三个元素一致 [AB][AB][AB],分值最高
  94. vm = []string{"A", "B"}
  95. for i := 0; i < 2; i++ {
  96. for j := 0; j < 2; j++ {
  97. for k := 0; k < 2; k++ {
  98. key := vm[i] + vm[j] + vm[k]
  99. compareAB[key] = true
  100. //fmt.Println(key)
  101. }
  102. }
  103. }
  104. //fmt.Println("-------------------", len(compareAB))
  105. //---至少两个一致,其他可能不存在
  106. //[AB][AB][ABD]
  107. //[AB][ABD][AB]
  108. vm = []string{"A", "B"}
  109. vm2 := []string{"A", "B", "D"}
  110. for i := 0; i < 2; i++ {
  111. for j := 0; j < 2; j++ {
  112. for k := 0; k < 3; k++ {
  113. key := vm[i] + vm[j] + vm2[k]
  114. if !compareAB[key] {
  115. compareAB2D[key] = true
  116. //fmt.Println(key)
  117. }
  118. }
  119. }
  120. }
  121. for i := 0; i < 2; i++ {
  122. for j := 0; j < 3; j++ {
  123. for k := 0; k < 2; k++ {
  124. key := vm[i] + vm2[j] + vm[k]
  125. if !compareAB[key] {
  126. compareAB2D[key] = true
  127. //fmt.Println(key)
  128. }
  129. }
  130. }
  131. }
  132. //fmt.Println("-------------------", len(compareAB2D))
  133. //---至少一个一致,其他可能不存在
  134. //[ABD][ABD][ABD] //已经删除DDD
  135. vm = []string{"A", "B", "D"}
  136. for i := 0; i < 3; i++ {
  137. for j := 0; j < 3; j++ {
  138. for k := 0; k < 3; k++ {
  139. key := vm[i] + vm[j] + vm[k]
  140. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  141. compareABD[key] = true
  142. //fmt.Println(key)
  143. }
  144. }
  145. }
  146. }
  147. //fmt.Println("-------------------", len(compareABD))
  148. //[AB][ABCD][AB]
  149. //[AB][AB][ABCD]
  150. vm = []string{"A", "B"}
  151. vm2 = []string{"A", "B", "C", "D"}
  152. for i := 0; i < 2; i++ {
  153. for j := 0; j < 4; j++ {
  154. for k := 0; k < 2; k++ {
  155. key := vm[i] + vm2[j] + vm[k]
  156. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  157. compareAB2CD[key] = true
  158. //fmt.Println(key)
  159. }
  160. }
  161. }
  162. }
  163. for i := 0; i < 2; i++ {
  164. for j := 0; j < 2; j++ {
  165. for k := 0; k < 4; k++ {
  166. key := vm[i] + vm[j] + vm2[k]
  167. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  168. compareAB2CD[key] = true
  169. //fmt.Println(key)
  170. }
  171. }
  172. }
  173. }
  174. //fmt.Println("-------------------", len(compareAB2CD))
  175. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  176. vm = []string{"A", "B", "C", "D"}
  177. for i := 0; i < 4; i++ {
  178. for j := 0; j < 4; j++ {
  179. for k := 0; k < 4; k++ {
  180. key := vm[i] + vm[j] + vm[k]
  181. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  182. compareABCD[key] = true
  183. //fmt.Println(key)
  184. }
  185. }
  186. }
  187. }
  188. }
  189. func CheckHanAndNum(str string) (b bool) {
  190. return nreg1.MatchString(str) && hreg1.MatchString(str)
  191. }
  192. func CheckZimuAndNum(str string) (b bool) {
  193. return zreg1.MatchString(str) && nreg1.MatchString(str)
  194. }
  195. type KeyMap struct {
  196. Lock sync.Mutex
  197. Map map[string]*Key
  198. }
  199. type ID struct {
  200. Id string
  201. Lock sync.Mutex
  202. P *ProjectInfo
  203. }
  204. type Key struct {
  205. Arr []string
  206. Lock sync.Mutex
  207. }
  208. type IdAndLock struct {
  209. Id string
  210. Lock sync.Mutex
  211. }
  212. func NewKeyMap() *KeyMap {
  213. return &KeyMap{
  214. Map: map[string]*Key{},
  215. Lock: sync.Mutex{},
  216. }
  217. }
  218. //招标信息实体类
  219. type Info struct {
  220. Id string `json:"_id"`
  221. Href string `json:"href"` //源地址
  222. Publishtime int64 `json:"publishtime"`
  223. Comeintime int64 `json:"comeintime"`
  224. Title string `json:"title"`
  225. TopType string `json:"toptype"`
  226. SubType string `json:"subtype"`
  227. ProjectName string `json:"projectname"`
  228. ProjectCode string `json:"projectcode"`
  229. ProjectScope string `json:"projectscope"`
  230. ContractCode string `json:"contractcode"`
  231. Buyer string `json:"buyer"`
  232. Buyerperson string `json:"buyerperson"`
  233. Buyertel string `json:"buyertel"`
  234. Agency string `json:"agency"`
  235. Area string `json:"area"`
  236. City string `json:"city"`
  237. District string `json:"district"`
  238. Infoformat int `json:"infoformat"`
  239. ReviewExperts []string `json:"review_experts"`
  240. Purchasing string `json:"purchasing"`
  241. WinnerOrder []map[string]interface{} `json:"winnerorder"`
  242. ProjectScale string `json:"project_scale"`
  243. ProjectDuration int `json:"project_duration"`
  244. ProjectTimeUnit string `json:"project_timeunit"`
  245. ProjectStartDate int64 `json:"project_startdate"`
  246. ProjectCompleteDate int64 `json:"project_completedate"`
  247. Payway string `json:"payway"`
  248. ContractGuarantee bool `json:"contract_guarantee"`
  249. BidGuarantee bool `json:"bid_guarantee"`
  250. Qualifies []map[string]interface{} `json:"qualifies"`
  251. HasPackage bool // `json:"haspackage"`
  252. Package map[string]interface{} `json:"package"`
  253. //PNum string `json:"pnum"`
  254. Topscopeclass []string `json:"topscopeclass"`
  255. Subscopeclass []string `json:"subscopeclass"`
  256. Buyerclass string `json:"buyerclass"`
  257. Bidopentime int64 `json:"bidopentime"`
  258. Budget float64 `json:"budget"`
  259. Bidamount float64 `json:"bidamount"`
  260. Winners []string
  261. dealtype int
  262. PTC string //从标题中抽的项目编号
  263. pnbval int //项目名称、编号、采购单位存在的个数
  264. LenPC int //项目编号长度
  265. LenPN int //项目名称长度
  266. LenPTC int //标题抽的项目编号长度
  267. //以下三个元素做对比,计算包含时候使用
  268. PNBH int //0初始,+包含,-被包含
  269. PCBH int
  270. PTCBH int
  271. }
  272. //项目实体类
  273. type ProjectInfo struct {
  274. Id primitive.ObjectID `json:"_id"`
  275. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  276. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  277. Ids []string `json:"ids,omitempty"`
  278. Topscopeclass []string `json:"topscopeclass,omitempty"`
  279. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  280. Winners []string `json:"s_winner,omitempty"` //中标人
  281. ProjectName string `json:"projectname,omitempty"` //项目名称
  282. ProjectCode string `json:"projectcode,omitempty"` //项目代码唯一(纯数字的权重低)
  283. ContractCode string `json:"contractcode,omitempty"` //项目编号
  284. Buyer string `json:"buyer,omitempty"` //采购单位唯一
  285. MPN []string `json:"mpn,omitempty"` //合并后多余的项目名称
  286. MPC []string `json:"mpc,omitempty"` //合并后多余的项目编号
  287. Buyerperson string `json:"buyerperson"` //采购联系人
  288. Buyertel string `json:"buyertel"` //采购联系人电话
  289. Agency string `json:"agency"` //代理机构
  290. Area string `json:"area"` //地区
  291. City string `json:"city"` //地市
  292. District string `json:"district"` //区县
  293. Bidstatus string `json:"bidstatus"` //
  294. Bidtype string `json:"bidtype"` //
  295. ReviewExperts []string `json:"review_experts"` // 项目评审专家
  296. Purchasing string `json:"purchasing"` // 标的物
  297. Package map[string]interface{} `json:"package,omitempty"` //分包的对比对象
  298. Buyerclass string `json:"buyerclass"` //采购单位分类
  299. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  300. Jgtime int64 `json:"jgtime"` //结果中标时间
  301. Zbtime int64 `json:"zbtime"` //招标时间
  302. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  303. Budget float64 `json:"budget,omitempty"` //预算
  304. Winnerorder []string `json:"winnerorder"` //中标候选人
  305. ProjectScale string `json:"project_scale"` //项目规模
  306. ProjectDuration int `json:"project_duration"` //工期时长
  307. ProjectTimeunit string `json:"project_timeunit"` //工期时长单位
  308. ProjectStartDate int64 `json:"project_startdate"` //开工日期
  309. ProjctCompleteDate int64 `json:"projct_completedate"` //竣工日期
  310. Payway string `json:"payway"` //付款方式
  311. ContractGuarantee bool `json:"contract_guarantee"` //履约保证金 是否支持包含
  312. BidGuarantee bool `json:"bid_guarantee"` //投标保证金 是否支持包含
  313. Qualifies string `json:"qualifies"` //资质条件
  314. score int
  315. comStr string
  316. resVal, pjVal int
  317. InfoFiled map[string]InfoField `json:"infofield"` //逻辑处理需要的info字段
  318. Budgettag int `json:"budgettag"` //预算是否有效标记
  319. Bidamounttag int `json:"bidamounttag"` //中标金额是否有效标记
  320. }
  321. //存储部分招标信息字段,业务逻辑处理需要
  322. type InfoField struct {
  323. Budget float64 `json:"budget"`
  324. Bidamount float64 `json:"bidamount"`
  325. ContractCode string `json:"contractcode"`
  326. ProjectName string `json:"projectname"`
  327. ProjectCode string `json:"projectcode"`
  328. Bidstatus string `json:"bidstatus"`
  329. }
  330. //站点信息
  331. type Site struct {
  332. Id string `json:"_id"`
  333. Site string `json:"site"` //站点名字
  334. Area string `json:"area"` //省
  335. City string `json:"city"` //市
  336. District string `json:"district"` //区、县
  337. Domain string `json:"domain"` //地址
  338. Status int `json:"status"` //
  339. }
  340. //二分字符串查找
  341. func BinarySearch(s []string, k string) int {
  342. sort.Strings(s)
  343. lo, hi := 0, len(s)-1
  344. for lo <= hi {
  345. m := (lo + hi) >> 1
  346. if s[m] < k {
  347. lo = m + 1
  348. } else if s[m] > k {
  349. hi = m - 1
  350. } else {
  351. return m
  352. }
  353. }
  354. return -1
  355. }
  356. //计算文本相似度
  357. func CosineSimilar(srcWords1, dstWords1 string) float64 {
  358. srcWords, dstWords := strings.Split(srcWords1, ""), strings.Split(dstWords1, "")
  359. // get all words
  360. allWordsMap := make(map[string]int, 0)
  361. for _, word := range srcWords {
  362. if _, found := allWordsMap[word]; !found {
  363. allWordsMap[word] = 1
  364. } else {
  365. allWordsMap[word] += 1
  366. }
  367. }
  368. for _, word := range dstWords {
  369. if _, found := allWordsMap[word]; !found {
  370. allWordsMap[word] = 1
  371. } else {
  372. allWordsMap[word] += 1
  373. }
  374. }
  375. // stable the sort
  376. allWordsSlice := make([]string, 0)
  377. for word, _ := range allWordsMap {
  378. allWordsSlice = append(allWordsSlice, word)
  379. }
  380. // assemble vector
  381. srcVector := make([]int, len(allWordsSlice))
  382. dstVector := make([]int, len(allWordsSlice))
  383. for _, word := range srcWords {
  384. if index := BinarySearch(allWordsSlice, word); index != -1 {
  385. srcVector[index] += 1
  386. }
  387. }
  388. for _, word := range dstWords {
  389. if index := BinarySearch(allWordsSlice, word); index != -1 {
  390. dstVector[index] += 1
  391. }
  392. }
  393. // calc cos
  394. numerator := float64(0)
  395. srcSq := 0
  396. dstSq := 0
  397. for i, srcCount := range srcVector {
  398. dstCount := dstVector[i]
  399. numerator += float64(srcCount * dstCount)
  400. srcSq += srcCount * srcCount
  401. dstSq += dstCount * dstCount
  402. }
  403. denominator := math.Sqrt(float64(srcSq * dstSq))
  404. v1 := numerator / denominator
  405. // if v1 > 0.6 {
  406. // log.Println(v1, srcWords1, dstWords1)
  407. // }
  408. return v1
  409. }
  410. func initWinnerRegexp() {
  411. winRegMap := Sysconfig["winner"].(map[string]interface{})
  412. preRegexps := winRegMap["pre_regexp"].([]interface{})
  413. backRegexps := winRegMap["back_regexp"].([]interface{})
  414. backRepRegexps := winRegMap["back_rep_regexp"].([]interface{})
  415. backBlack := winRegMap["blacklist"].([]interface{})
  416. var winPreRegexps []*regexp.Regexp
  417. for _, v := range preRegexps {
  418. reg := regexp.MustCompile("^" + v.(string))
  419. winPreRegexps = append(winPreRegexps, reg)
  420. }
  421. PreRegexp["winner"] = winPreRegexps
  422. var winBackRegexps []*regexp.Regexp
  423. for _, v := range backRegexps {
  424. reg := regexp.MustCompile(v.(string))
  425. winBackRegexps = append(winBackRegexps, reg)
  426. }
  427. BackRegexp["winner"] = winBackRegexps
  428. var winBackRepRegexps []RegexpInfo
  429. for _, v := range backRepRegexps {
  430. reps := strings.Split(v.(string), "#")
  431. if len(reps) > 1 {
  432. reg := RegexpInfo{
  433. regs: regexp.MustCompile(reps[0]),
  434. repstr: reps[1],
  435. }
  436. winBackRepRegexps = append(winBackRepRegexps, reg)
  437. }
  438. }
  439. BackRepRegexp["winner"] = winBackRepRegexps
  440. var winBlackRegexps []*regexp.Regexp
  441. for _, v := range backBlack {
  442. reg := regexp.MustCompile(v.(string))
  443. winBlackRegexps = append(winBlackRegexps, reg)
  444. }
  445. BlackRegexp["winner"] = winBlackRegexps
  446. }
  447. func initBuyerRegexp() {
  448. buyRegMap := Sysconfig["buyer"].(map[string]interface{})
  449. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  450. backRegexps := buyRegMap["back_regexp"].([]interface{})
  451. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  452. backBlack := buyRegMap["blacklist"].([]interface{})
  453. var winPreRegexps []*regexp.Regexp
  454. for _, v := range preRegexps {
  455. reg := regexp.MustCompile("^" + v.(string))
  456. winPreRegexps = append(winPreRegexps, reg)
  457. }
  458. PreRegexp["buyer"] = winPreRegexps
  459. var winBackRegexps []*regexp.Regexp
  460. for _, v := range backRegexps {
  461. reg := regexp.MustCompile(v.(string))
  462. winBackRegexps = append(winBackRegexps, reg)
  463. }
  464. BackRegexp["buyer"] = winBackRegexps
  465. var winBackRepRegexps []RegexpInfo
  466. for _, v := range backRepRegexps {
  467. reps := strings.Split(v.(string), "#")
  468. if len(reps) > 1 {
  469. reg := RegexpInfo{
  470. regs: regexp.MustCompile(reps[0]),
  471. repstr: reps[1],
  472. }
  473. winBackRepRegexps = append(winBackRepRegexps, reg)
  474. }
  475. }
  476. BackRepRegexp["buyer"] = winBackRepRegexps
  477. var winBlackRegexps []*regexp.Regexp
  478. for _, v := range backBlack {
  479. reg := regexp.MustCompile(v.(string))
  480. winBlackRegexps = append(winBlackRegexps, reg)
  481. }
  482. BlackRegexp["buyer"] = winBlackRegexps
  483. }
  484. func initAgencyRegexp() {
  485. buyRegMap := Sysconfig["agency"].(map[string]interface{})
  486. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  487. backRegexps := buyRegMap["back_regexp"].([]interface{})
  488. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  489. backBlack := buyRegMap["blacklist"].([]interface{})
  490. var winPreRegexps []*regexp.Regexp
  491. for _, v := range preRegexps {
  492. reg := regexp.MustCompile("^" + v.(string))
  493. winPreRegexps = append(winPreRegexps, reg)
  494. }
  495. PreRegexp["agency"] = winPreRegexps
  496. var winBackRegexps []*regexp.Regexp
  497. for _, v := range backRegexps {
  498. reg := regexp.MustCompile(v.(string))
  499. winBackRegexps = append(winBackRegexps, reg)
  500. }
  501. BackRegexp["agency"] = winBackRegexps
  502. var winBackRepRegexps []RegexpInfo
  503. for _, v := range backRepRegexps {
  504. reps := strings.Split(v.(string), "#")
  505. if len(reps) > 1 {
  506. reg := RegexpInfo{
  507. regs: regexp.MustCompile(reps[0]),
  508. repstr: reps[1],
  509. }
  510. winBackRepRegexps = append(winBackRepRegexps, reg)
  511. }
  512. }
  513. BackRepRegexp["agency"] = winBackRepRegexps
  514. var winBlackRegexps []*regexp.Regexp
  515. for _, v := range backBlack {
  516. reg := regexp.MustCompile(v.(string))
  517. winBlackRegexps = append(winBlackRegexps, reg)
  518. }
  519. BlackRegexp["agency"] = winBlackRegexps
  520. }