init.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. package main
  2. import (
  3. "log"
  4. "math"
  5. mu "mfw/util"
  6. "qfw/util"
  7. "regexp"
  8. "sort"
  9. "strings"
  10. "sync"
  11. "go.mongodb.org/mongo-driver/bson/primitive"
  12. )
  13. const (
  14. ProjectCache = "info" //存放每条项目信息,key为项目ID
  15. )
  16. var (
  17. Sysconfig map[string]interface{} //读取配置文件
  18. MongoTool *MongodbSim //mongodb连接
  19. ExtractColl, ProjectColl, BackupColl, SiteColl string //抽取表、项目表、项目快照表、站点表
  20. Thread int //配置项线程数
  21. //NextNode []interface{}
  22. BlackList []interface{}
  23. BlaskListMap map[string]bool
  24. )
  25. var (
  26. //判断是日期
  27. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  28. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  29. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  30. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  31. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  32. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  33. //判断带有分包、等特定词的
  34. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  35. //判断包含数值
  36. nreg1 = regexp.MustCompile("[0-9]{2,}")
  37. //判断包含字母
  38. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  39. //判断包含汉字
  40. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  41. //判断项目编号是在10以内的纯数字结构
  42. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  43. //仅初始化使用
  44. compareNoPass = map[string]bool{}
  45. compareAB = map[string]bool{}
  46. compareAB2D = map[string]bool{}
  47. compareABD = map[string]bool{}
  48. compareAB2CD = map[string]bool{}
  49. compareABCD = map[string]bool{}
  50. )
  51. func init() {
  52. util.ReadConfig(&Sysconfig)
  53. MongoTool = &MongodbSim{
  54. MongodbAddr: Sysconfig["mongodbServers"].(string),
  55. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  56. DbName: Sysconfig["mongodbName"].(string),
  57. }
  58. MongoTool.InitPool()
  59. ExtractColl = Sysconfig["extractColl"].(string)
  60. ProjectColl = Sysconfig["projectColl"].(string)
  61. BackupColl = Sysconfig["projectColl"].(string) + "_back"
  62. SiteColl = Sysconfig["siteColl"].(string)
  63. Thread = util.IntAll(Sysconfig["thread"])
  64. //NextNode = Sysconfig["nextNode"].([]interface{})
  65. udpport, _ := Sysconfig["udpport"].(string)
  66. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  67. udpclient.Listen(processUdpMsg)
  68. log.Println("Udp服务监听", udpport)
  69. BlackList = Sysconfig["rp_blacklist"].([]interface{})
  70. BlaskListMap = make(map[string]bool)
  71. for _, v := range BlackList {
  72. BlaskListMap[util.ObjToString(v)] = true
  73. }
  74. initWinnerRegexp()
  75. initBuyerRegexp()
  76. initAgencyRegexp()
  77. //加载项目数据
  78. //---不能通过
  79. vm := []string{"C", "D"}
  80. for i := 0; i < 2; i++ {
  81. for j := 0; j < 2; j++ {
  82. for k := 0; k < 2; k++ {
  83. key := vm[i] + vm[j] + vm[k]
  84. compareNoPass[key] = true
  85. //fmt.Println(key)
  86. }
  87. }
  88. }
  89. //fmt.Println("-------------------")
  90. //三个元素一致 [AB][AB][AB],分值最高
  91. vm = []string{"A", "B"}
  92. for i := 0; i < 2; i++ {
  93. for j := 0; j < 2; j++ {
  94. for k := 0; k < 2; k++ {
  95. key := vm[i] + vm[j] + vm[k]
  96. compareAB[key] = true
  97. //fmt.Println(key)
  98. }
  99. }
  100. }
  101. //fmt.Println("-------------------", len(compareAB))
  102. //---至少两个一致,其他可能不存在
  103. //[AB][AB][ABD]
  104. //[AB][ABD][AB]
  105. vm = []string{"A", "B"}
  106. vm2 := []string{"A", "B", "D"}
  107. for i := 0; i < 2; i++ {
  108. for j := 0; j < 2; j++ {
  109. for k := 0; k < 3; k++ {
  110. key := vm[i] + vm[j] + vm2[k]
  111. if !compareAB[key] {
  112. compareAB2D[key] = true
  113. //fmt.Println(key)
  114. }
  115. }
  116. }
  117. }
  118. for i := 0; i < 2; i++ {
  119. for j := 0; j < 3; j++ {
  120. for k := 0; k < 2; k++ {
  121. key := vm[i] + vm2[j] + vm[k]
  122. if !compareAB[key] {
  123. compareAB2D[key] = true
  124. //fmt.Println(key)
  125. }
  126. }
  127. }
  128. }
  129. //fmt.Println("-------------------", len(compareAB2D))
  130. //---至少一个一致,其他可能不存在
  131. //[ABD][ABD][ABD] //已经删除DDD
  132. vm = []string{"A", "B", "D"}
  133. for i := 0; i < 3; i++ {
  134. for j := 0; j < 3; j++ {
  135. for k := 0; k < 3; k++ {
  136. key := vm[i] + vm[j] + vm[k]
  137. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  138. compareABD[key] = true
  139. //fmt.Println(key)
  140. }
  141. }
  142. }
  143. }
  144. //fmt.Println("-------------------", len(compareABD))
  145. //[AB][ABCD][AB]
  146. //[AB][AB][ABCD]
  147. vm = []string{"A", "B"}
  148. vm2 = []string{"A", "B", "C", "D"}
  149. for i := 0; i < 2; i++ {
  150. for j := 0; j < 4; j++ {
  151. for k := 0; k < 2; k++ {
  152. key := vm[i] + vm2[j] + vm[k]
  153. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  154. compareAB2CD[key] = true
  155. //fmt.Println(key)
  156. }
  157. }
  158. }
  159. }
  160. for i := 0; i < 2; i++ {
  161. for j := 0; j < 2; j++ {
  162. for k := 0; k < 4; k++ {
  163. key := vm[i] + vm[j] + vm2[k]
  164. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  165. compareAB2CD[key] = true
  166. //fmt.Println(key)
  167. }
  168. }
  169. }
  170. }
  171. //fmt.Println("-------------------", len(compareAB2CD))
  172. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  173. vm = []string{"A", "B", "C", "D"}
  174. for i := 0; i < 4; i++ {
  175. for j := 0; j < 4; j++ {
  176. for k := 0; k < 4; k++ {
  177. key := vm[i] + vm[j] + vm[k]
  178. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  179. compareABCD[key] = true
  180. //fmt.Println(key)
  181. }
  182. }
  183. }
  184. }
  185. }
  186. func CheckHanAndNum(str string) (b bool) {
  187. return nreg1.MatchString(str) && hreg1.MatchString(str)
  188. }
  189. func CheckZimuAndNum(str string) (b bool) {
  190. return zreg1.MatchString(str) && nreg1.MatchString(str)
  191. }
  192. type KeyMap struct {
  193. Lock sync.Mutex
  194. Map map[string]*Key
  195. }
  196. type ID struct {
  197. Id string
  198. Lock sync.Mutex
  199. P *ProjectInfo
  200. }
  201. type Key struct {
  202. Arr []string
  203. Lock sync.Mutex
  204. }
  205. type IdAndLock struct {
  206. Id string
  207. Lock sync.Mutex
  208. }
  209. func NewKeyMap() *KeyMap {
  210. return &KeyMap{
  211. Map: map[string]*Key{},
  212. Lock: sync.Mutex{},
  213. }
  214. }
  215. //招标信息实体类
  216. type Info struct {
  217. Id string `json:"_id"`
  218. Href string `json:"href"` //源地址
  219. Publishtime int64 `json:"publishtime"`
  220. Comeintime int64 `json:"comeintime"`
  221. Title string `json:"title"`
  222. TopType string `json:"toptype"`
  223. SubType string `json:"subtype"`
  224. ProjectName string `json:"projectname"`
  225. ProjectCode string `json:"projectcode"`
  226. ProjectScope string `json:"projectscope"`
  227. ContractCode string `json:"contractcode"`
  228. Buyer string `json:"buyer"`
  229. Buyerperson string `json:"buyerperson"`
  230. Buyertel string `json:"buyertel"`
  231. Agency string `json:"agency"`
  232. Area string `json:"area"`
  233. City string `json:"city"`
  234. District string `json:"district"`
  235. Infoformat int `json:"infoformat"`
  236. ReviewExperts []string `json:"review_experts"`
  237. Purchasing string `json:"purchasing"`
  238. WinnerOrder []map[string]interface{} `json:"winnerorder"`
  239. HasPackage bool // `json:"haspackage"`
  240. Package map[string]interface{} `json:"package"`
  241. //PNum string `json:"pnum"`
  242. Topscopeclass []string `json:"topscopeclass"`
  243. Subscopeclass []string `json:"subscopeclass"`
  244. Buyerclass string `json:"buyerclass"`
  245. Bidopentime int64 `json:"bidopentime"`
  246. Budget float64 `json:"budget"`
  247. Bidamount float64 `json:"bidamount"`
  248. Winners []string
  249. dealtype int
  250. PTC string //从标题中抽的项目编号
  251. pnbval int //项目名称、编号、采购单位存在的个数
  252. LenPC int //项目编号长度
  253. LenPN int //项目名称长度
  254. LenPTC int //标题抽的项目编号长度
  255. //以下三个元素做对比,计算包含时候使用
  256. PNBH int //0初始,+包含,-被包含
  257. PCBH int
  258. PTCBH int
  259. }
  260. //项目实体类
  261. type ProjectInfo struct {
  262. Id primitive.ObjectID `json:"_id"`
  263. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  264. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  265. Ids []string `json:"ids,omitempty"`
  266. Topscopeclass []string `json:"topscopeclass,omitempty"`
  267. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  268. Winners []string `json:"s_winner,omitempty"` //中标人
  269. ProjectName string `json:"projectname,omitempty"` //项目名称
  270. ProjectCode string `json:"projectcode,omitempty"` //项目代码唯一(纯数字的权重低)
  271. ContractCode string `json:"contractcode,omitempty"` //项目编号
  272. Buyer string `json:"buyer,omitempty"` //采购单位唯一
  273. MPN []string `json:"mpn,omitempty"` //合并后多余的项目名称
  274. MPC []string `json:"mpc,omitempty"` //合并后多余的项目编号
  275. Buyerperson string `json:"buyerperson"` //采购联系人
  276. Buyertel string `json:"buyertel"` //采购联系人电话
  277. Agency string `json:"agency"` //代理机构
  278. Area string `json:"area"` //地区
  279. City string `json:"city"` //地市
  280. District string `json:"district"` //区县
  281. Bidstatus string `json:"bidstatus"` //
  282. Bidtype string `json:"bidtype"` //
  283. ReviewExperts []string `json:"review_experts"` // 项目评审专家
  284. Purchasing string `json:"purchasing"` // 标的物
  285. //HasPackage bool `json:"haspackage"` //是否有分包
  286. Package map[string]interface{} `json:"package,omitempty"` //分包的对比对象
  287. Buyerclass string `json:"buyerclass"` //采购单位分类
  288. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  289. // Zbtime int64 `json:"zbtime"` //招标时间
  290. Jgtime int64 `json:"jgtime"` //结果中标时间
  291. Zbtime int64 `json:"zbtime"` //招标时间
  292. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  293. Budget float64 `json:"budget,omitempty"` //预算
  294. Winnerorder []string `json:"winnerorder"` //中标候选人
  295. score int
  296. comStr string
  297. resVal, pjVal int
  298. InfoFiled map[string]InfoField `json:"infofield"` //逻辑处理需要的info字段
  299. Budgettag int `json:"budgettag"` //预算是否有效标记
  300. Bidamounttag int `json:"bidamounttag"` //中标金额是否有效标记
  301. }
  302. //存储部分招标信息字段,业务逻辑处理需要
  303. type InfoField struct {
  304. Budget float64 `json:"budget"`
  305. Bidamount float64 `json:"bidamount"`
  306. ContractCode string `json:"contractcode"`
  307. ProjectName string `json:"projectname"`
  308. ProjectCode string `json:"projectcode"`
  309. Bidstatus string `json:"bidstatus"`
  310. }
  311. //站点信息
  312. type Site struct {
  313. Id string `json:"_id"`
  314. Site string `json:"site"` //站点名字
  315. Area string `json:"area"` //省
  316. City string `json:"city"` //市
  317. District string `json:"district"` //区、县
  318. Domain string `json:"domain"` //地址
  319. Status int `json:"status"` //
  320. }
  321. //二分字符串查找
  322. func BinarySearch(s []string, k string) int {
  323. sort.Strings(s)
  324. lo, hi := 0, len(s)-1
  325. for lo <= hi {
  326. m := (lo + hi) >> 1
  327. if s[m] < k {
  328. lo = m + 1
  329. } else if s[m] > k {
  330. hi = m - 1
  331. } else {
  332. return m
  333. }
  334. }
  335. return -1
  336. }
  337. //计算文本相似度
  338. func CosineSimilar(srcWords1, dstWords1 string) float64 {
  339. srcWords, dstWords := strings.Split(srcWords1, ""), strings.Split(dstWords1, "")
  340. // get all words
  341. allWordsMap := make(map[string]int, 0)
  342. for _, word := range srcWords {
  343. if _, found := allWordsMap[word]; !found {
  344. allWordsMap[word] = 1
  345. } else {
  346. allWordsMap[word] += 1
  347. }
  348. }
  349. for _, word := range dstWords {
  350. if _, found := allWordsMap[word]; !found {
  351. allWordsMap[word] = 1
  352. } else {
  353. allWordsMap[word] += 1
  354. }
  355. }
  356. // stable the sort
  357. allWordsSlice := make([]string, 0)
  358. for word, _ := range allWordsMap {
  359. allWordsSlice = append(allWordsSlice, word)
  360. }
  361. // assemble vector
  362. srcVector := make([]int, len(allWordsSlice))
  363. dstVector := make([]int, len(allWordsSlice))
  364. for _, word := range srcWords {
  365. if index := BinarySearch(allWordsSlice, word); index != -1 {
  366. srcVector[index] += 1
  367. }
  368. }
  369. for _, word := range dstWords {
  370. if index := BinarySearch(allWordsSlice, word); index != -1 {
  371. dstVector[index] += 1
  372. }
  373. }
  374. // calc cos
  375. numerator := float64(0)
  376. srcSq := 0
  377. dstSq := 0
  378. for i, srcCount := range srcVector {
  379. dstCount := dstVector[i]
  380. numerator += float64(srcCount * dstCount)
  381. srcSq += srcCount * srcCount
  382. dstSq += dstCount * dstCount
  383. }
  384. denominator := math.Sqrt(float64(srcSq * dstSq))
  385. v1 := numerator / denominator
  386. // if v1 > 0.6 {
  387. // log.Println(v1, srcWords1, dstWords1)
  388. // }
  389. return v1
  390. }
  391. func initWinnerRegexp() {
  392. winRegMap := Sysconfig["winner"].(map[string]interface{})
  393. preRegexps := winRegMap["pre_regexp"].([]interface{})
  394. backRegexps := winRegMap["back_regexp"].([]interface{})
  395. backRepRegexps := winRegMap["back_rep_regexp"].([]interface{})
  396. backBlack := winRegMap["blacklist"].([]interface{})
  397. var winPreRegexps []*regexp.Regexp
  398. for _, v := range preRegexps {
  399. reg := regexp.MustCompile("^" + v.(string))
  400. winPreRegexps = append(winPreRegexps, reg)
  401. }
  402. PreRegexp["winner"] = winPreRegexps
  403. var winBackRegexps []*regexp.Regexp
  404. for _, v := range backRegexps {
  405. reg := regexp.MustCompile(v.(string))
  406. winBackRegexps = append(winBackRegexps, reg)
  407. }
  408. BackRegexp["winner"] = winBackRegexps
  409. var winBackRepRegexps []RegexpInfo
  410. for _, v := range backRepRegexps {
  411. reps := strings.Split(v.(string), "#")
  412. if len(reps) > 1 {
  413. reg := RegexpInfo{
  414. regs: regexp.MustCompile(reps[0]),
  415. repstr: reps[1],
  416. }
  417. winBackRepRegexps = append(winBackRepRegexps, reg)
  418. }
  419. }
  420. BackRepRegexp["winner"] = winBackRepRegexps
  421. var winBlackRegexps []*regexp.Regexp
  422. for _, v := range backBlack {
  423. reg := regexp.MustCompile(v.(string))
  424. winBlackRegexps = append(winBlackRegexps, reg)
  425. }
  426. BlackRegexp["winner"] = winBlackRegexps
  427. }
  428. func initBuyerRegexp() {
  429. buyRegMap := Sysconfig["buyer"].(map[string]interface{})
  430. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  431. backRegexps := buyRegMap["back_regexp"].([]interface{})
  432. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  433. backBlack := buyRegMap["blacklist"].([]interface{})
  434. var winPreRegexps []*regexp.Regexp
  435. for _, v := range preRegexps {
  436. reg := regexp.MustCompile("^" + v.(string))
  437. winPreRegexps = append(winPreRegexps, reg)
  438. }
  439. PreRegexp["buyer"] = winPreRegexps
  440. var winBackRegexps []*regexp.Regexp
  441. for _, v := range backRegexps {
  442. reg := regexp.MustCompile(v.(string))
  443. winBackRegexps = append(winBackRegexps, reg)
  444. }
  445. BackRegexp["buyer"] = winBackRegexps
  446. var winBackRepRegexps []RegexpInfo
  447. for _, v := range backRepRegexps {
  448. reps := strings.Split(v.(string), "#")
  449. if len(reps) > 1 {
  450. reg := RegexpInfo{
  451. regs: regexp.MustCompile(reps[0]),
  452. repstr: reps[1],
  453. }
  454. winBackRepRegexps = append(winBackRepRegexps, reg)
  455. }
  456. }
  457. BackRepRegexp["buyer"] = winBackRepRegexps
  458. var winBlackRegexps []*regexp.Regexp
  459. for _, v := range backBlack {
  460. reg := regexp.MustCompile(v.(string))
  461. winBlackRegexps = append(winBlackRegexps, reg)
  462. }
  463. BlackRegexp["buyer"] = winBlackRegexps
  464. }
  465. func initAgencyRegexp() {
  466. buyRegMap := Sysconfig["agency"].(map[string]interface{})
  467. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  468. backRegexps := buyRegMap["back_regexp"].([]interface{})
  469. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  470. backBlack := buyRegMap["blacklist"].([]interface{})
  471. var winPreRegexps []*regexp.Regexp
  472. for _, v := range preRegexps {
  473. reg := regexp.MustCompile("^" + v.(string))
  474. winPreRegexps = append(winPreRegexps, reg)
  475. }
  476. PreRegexp["agency"] = winPreRegexps
  477. var winBackRegexps []*regexp.Regexp
  478. for _, v := range backRegexps {
  479. reg := regexp.MustCompile(v.(string))
  480. winBackRegexps = append(winBackRegexps, reg)
  481. }
  482. BackRegexp["agency"] = winBackRegexps
  483. var winBackRepRegexps []RegexpInfo
  484. for _, v := range backRepRegexps {
  485. reps := strings.Split(v.(string), "#")
  486. if len(reps) > 1 {
  487. reg := RegexpInfo{
  488. regs: regexp.MustCompile(reps[0]),
  489. repstr: reps[1],
  490. }
  491. winBackRepRegexps = append(winBackRepRegexps, reg)
  492. }
  493. }
  494. BackRepRegexp["agency"] = winBackRepRegexps
  495. var winBlackRegexps []*regexp.Regexp
  496. for _, v := range backBlack {
  497. reg := regexp.MustCompile(v.(string))
  498. winBlackRegexps = append(winBlackRegexps, reg)
  499. }
  500. BlackRegexp["agency"] = winBlackRegexps
  501. }