init.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545
  1. package main
  2. import (
  3. "log"
  4. "math"
  5. mu "mfw/util"
  6. "qfw/util"
  7. "regexp"
  8. "sort"
  9. "strings"
  10. "sync"
  11. "go.mongodb.org/mongo-driver/bson/primitive"
  12. )
  13. const (
  14. ProjectCache = "info" //存放每条项目信息,key为项目ID
  15. )
  16. var (
  17. Sysconfig map[string]interface{} //读取配置文件
  18. MongoTool *MongodbSim //mongodb连接
  19. ExtractColl, ProjectColl, BackupColl, SiteColl string //抽取表、项目表、项目快照表、站点表
  20. UpdateColl string // 金额修改数据表
  21. Thread int //配置项线程数
  22. //NextNode []interface{}
  23. BlackList []interface{}
  24. BlaskListMap map[string]bool
  25. )
  26. var (
  27. //判断是日期
  28. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  29. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  30. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  31. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  32. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  33. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  34. //判断带有分包、等特定词的
  35. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  36. //判断包含数值
  37. nreg1 = regexp.MustCompile("[0-9]{2,}")
  38. //判断包含字母
  39. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  40. //判断包含汉字
  41. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  42. //判断项目编号是在10以内的纯数字结构
  43. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  44. //仅初始化使用
  45. compareNoPass = map[string]bool{}
  46. compareAB = map[string]bool{}
  47. compareAB2D = map[string]bool{}
  48. compareABD = map[string]bool{}
  49. compareAB2CD = map[string]bool{}
  50. compareABCD = map[string]bool{}
  51. )
  52. func init() {
  53. util.ReadConfig(&Sysconfig)
  54. MongoTool = &MongodbSim{
  55. MongodbAddr: Sysconfig["mongodbServers"].(string),
  56. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  57. DbName: Sysconfig["mongodbName"].(string),
  58. }
  59. MongoTool.InitPool()
  60. ExtractColl = Sysconfig["extractColl"].(string)
  61. ProjectColl = Sysconfig["projectColl"].(string)
  62. UpdateColl = Sysconfig["updateColl"].(string)
  63. BackupColl = Sysconfig["projectColl"].(string) + "_back"
  64. SiteColl = Sysconfig["siteColl"].(string)
  65. Thread = util.IntAll(Sysconfig["thread"])
  66. //NextNode = Sysconfig["nextNode"].([]interface{})
  67. udpport, _ := Sysconfig["udpport"].(string)
  68. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  69. udpclient.Listen(processUdpMsg)
  70. log.Println("Udp服务监听", udpport)
  71. BlackList = Sysconfig["blacklist"].([]interface{})
  72. BlaskListMap = make(map[string]bool)
  73. for _, v := range BlackList {
  74. BlaskListMap[util.ObjToString(v)] = true
  75. }
  76. initWinnerRegexp()
  77. initBuyerRegexp()
  78. initAgencyRegexp()
  79. //加载项目数据
  80. //---不能通过
  81. vm := []string{"C", "D"}
  82. for i := 0; i < 2; i++ {
  83. for j := 0; j < 2; j++ {
  84. for k := 0; k < 2; k++ {
  85. key := vm[i] + vm[j] + vm[k]
  86. compareNoPass[key] = true
  87. //fmt.Println(key)
  88. }
  89. }
  90. }
  91. //fmt.Println("-------------------")
  92. //三个元素一致 [AB][AB][AB],分值最高
  93. vm = []string{"A", "B"}
  94. for i := 0; i < 2; i++ {
  95. for j := 0; j < 2; j++ {
  96. for k := 0; k < 2; k++ {
  97. key := vm[i] + vm[j] + vm[k]
  98. compareAB[key] = true
  99. //fmt.Println(key)
  100. }
  101. }
  102. }
  103. //fmt.Println("-------------------", len(compareAB))
  104. //---至少两个一致,其他可能不存在
  105. //[AB][AB][ABD]
  106. //[AB][ABD][AB]
  107. vm = []string{"A", "B"}
  108. vm2 := []string{"A", "B", "D"}
  109. for i := 0; i < 2; i++ {
  110. for j := 0; j < 2; j++ {
  111. for k := 0; k < 3; k++ {
  112. key := vm[i] + vm[j] + vm2[k]
  113. if !compareAB[key] {
  114. compareAB2D[key] = true
  115. //fmt.Println(key)
  116. }
  117. }
  118. }
  119. }
  120. for i := 0; i < 2; i++ {
  121. for j := 0; j < 3; j++ {
  122. for k := 0; k < 2; k++ {
  123. key := vm[i] + vm2[j] + vm[k]
  124. if !compareAB[key] {
  125. compareAB2D[key] = true
  126. //fmt.Println(key)
  127. }
  128. }
  129. }
  130. }
  131. //fmt.Println("-------------------", len(compareAB2D))
  132. //---至少一个一致,其他可能不存在
  133. //[ABD][ABD][ABD] //已经删除DDD
  134. vm = []string{"A", "B", "D"}
  135. for i := 0; i < 3; i++ {
  136. for j := 0; j < 3; j++ {
  137. for k := 0; k < 3; k++ {
  138. key := vm[i] + vm[j] + vm[k]
  139. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  140. compareABD[key] = true
  141. //fmt.Println(key)
  142. }
  143. }
  144. }
  145. }
  146. //fmt.Println("-------------------", len(compareABD))
  147. //[AB][ABCD][AB]
  148. //[AB][AB][ABCD]
  149. vm = []string{"A", "B"}
  150. vm2 = []string{"A", "B", "C", "D"}
  151. for i := 0; i < 2; i++ {
  152. for j := 0; j < 4; j++ {
  153. for k := 0; k < 2; k++ {
  154. key := vm[i] + vm2[j] + vm[k]
  155. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  156. compareAB2CD[key] = true
  157. //fmt.Println(key)
  158. }
  159. }
  160. }
  161. }
  162. for i := 0; i < 2; i++ {
  163. for j := 0; j < 2; j++ {
  164. for k := 0; k < 4; k++ {
  165. key := vm[i] + vm[j] + vm2[k]
  166. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  167. compareAB2CD[key] = true
  168. //fmt.Println(key)
  169. }
  170. }
  171. }
  172. }
  173. //fmt.Println("-------------------", len(compareAB2CD))
  174. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  175. vm = []string{"A", "B", "C", "D"}
  176. for i := 0; i < 4; i++ {
  177. for j := 0; j < 4; j++ {
  178. for k := 0; k < 4; k++ {
  179. key := vm[i] + vm[j] + vm[k]
  180. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  181. compareABCD[key] = true
  182. //fmt.Println(key)
  183. }
  184. }
  185. }
  186. }
  187. }
  188. func CheckHanAndNum(str string) (b bool) {
  189. return nreg1.MatchString(str) && hreg1.MatchString(str)
  190. }
  191. func CheckZimuAndNum(str string) (b bool) {
  192. return zreg1.MatchString(str) && nreg1.MatchString(str)
  193. }
  194. type KeyMap struct {
  195. Lock sync.Mutex
  196. Map map[string]*Key
  197. }
  198. type ID struct {
  199. Id string
  200. Lock sync.Mutex
  201. P *ProjectInfo
  202. }
  203. type Key struct {
  204. Arr []string
  205. Lock sync.Mutex
  206. }
  207. type IdAndLock struct {
  208. Id string
  209. Lock sync.Mutex
  210. }
  211. func NewKeyMap() *KeyMap {
  212. return &KeyMap{
  213. Map: map[string]*Key{},
  214. Lock: sync.Mutex{},
  215. }
  216. }
  217. //招标信息实体类
  218. type Info struct {
  219. Id string `json:"_id"`
  220. Href string `json:"href"` //源地址
  221. Publishtime int64 `json:"publishtime"`
  222. Comeintime int64 `json:"comeintime"`
  223. Title string `json:"title"`
  224. TopType string `json:"toptype"`
  225. SubType string `json:"subtype"`
  226. ProjectName string `json:"projectname"`
  227. ProjectCode string `json:"projectcode"`
  228. ProjectScope string `json:"projectscope"`
  229. ContractCode string `json:"contractcode"`
  230. Buyer string `json:"buyer"`
  231. Buyerperson string `json:"buyerperson"`
  232. Buyertel string `json:"buyertel"`
  233. Agency string `json:"agency"`
  234. Area string `json:"area"`
  235. City string `json:"city"`
  236. District string `json:"district"`
  237. Infoformat int `json:"infoformat"`
  238. ReviewExperts []string `json:"review_experts"`
  239. Purchasing string `json:"purchasing"`
  240. WinnerOrder []map[string]interface{} `json:"winnerorder"`
  241. HasPackage bool // `json:"haspackage"`
  242. Package map[string]interface{} `json:"package"`
  243. //PNum string `json:"pnum"`
  244. Topscopeclass []string `json:"topscopeclass"`
  245. Subscopeclass []string `json:"subscopeclass"`
  246. Buyerclass string `json:"buyerclass"`
  247. Bidopentime int64 `json:"bidopentime"`
  248. Budget float64 `json:"budget"`
  249. Bidamount float64 `json:"bidamount"`
  250. Winners []string
  251. dealtype int
  252. PTC string //从标题中抽的项目编号
  253. pnbval int //项目名称、编号、采购单位存在的个数
  254. LenPC int //项目编号长度
  255. LenPN int //项目名称长度
  256. LenPTC int //标题抽的项目编号长度
  257. //以下三个元素做对比,计算包含时候使用
  258. PNBH int //0初始,+包含,-被包含
  259. PCBH int
  260. PTCBH int
  261. }
  262. //项目实体类
  263. type ProjectInfo struct {
  264. Id primitive.ObjectID `json:"_id"`
  265. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  266. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  267. Ids []string `json:"ids,omitempty"`
  268. Topscopeclass []string `json:"topscopeclass,omitempty"`
  269. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  270. Winners []string `json:"s_winner,omitempty"` //中标人
  271. ProjectName string `json:"projectname,omitempty"` //项目名称
  272. ProjectCode string `json:"projectcode,omitempty"` //项目代码唯一(纯数字的权重低)
  273. ContractCode string `json:"contractcode,omitempty"` //项目编号
  274. Buyer string `json:"buyer,omitempty"` //采购单位唯一
  275. MPN []string `json:"mpn,omitempty"` //合并后多余的项目名称
  276. MPC []string `json:"mpc,omitempty"` //合并后多余的项目编号
  277. Buyerperson string `json:"buyerperson"` //采购联系人
  278. Buyertel string `json:"buyertel"` //采购联系人电话
  279. Agency string `json:"agency"` //代理机构
  280. Area string `json:"area"` //地区
  281. City string `json:"city"` //地市
  282. District string `json:"district"` //区县
  283. Bidstatus string `json:"bidstatus"` //
  284. Bidtype string `json:"bidtype"` //
  285. ReviewExperts []string `json:"review_experts"` // 项目评审专家
  286. Purchasing string `json:"purchasing"` // 标的物
  287. //HasPackage bool `json:"haspackage"` //是否有分包
  288. Package map[string]interface{} `json:"package,omitempty"` //分包的对比对象
  289. Buyerclass string `json:"buyerclass"` //采购单位分类
  290. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  291. // Zbtime int64 `json:"zbtime"` //招标时间
  292. Jgtime int64 `json:"jgtime"` //结果中标时间
  293. Zbtime int64 `json:"zbtime"` //招标时间
  294. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  295. Budget float64 `json:"budget,omitempty"` //预算
  296. Winnerorder []string `json:"winnerorder"` //中标候选人
  297. score int
  298. comStr string
  299. resVal, pjVal int
  300. InfoFiled map[string]InfoField `json:"infofield"` //逻辑处理需要的info字段
  301. Budgettag int `json:"budgettag"` //预算是否有效标记
  302. Bidamounttag int `json:"bidamounttag"` //中标金额是否有效标记
  303. }
  304. //存储部分招标信息字段,业务逻辑处理需要
  305. type InfoField struct {
  306. Budget float64 `json:"budget"`
  307. Bidamount float64 `json:"bidamount"`
  308. ContractCode string `json:"contractcode"`
  309. ProjectName string `json:"projectname"`
  310. ProjectCode string `json:"projectcode"`
  311. Bidstatus string `json:"bidstatus"`
  312. }
  313. //站点信息
  314. type Site struct {
  315. Id string `json:"_id"`
  316. Site string `json:"site"` //站点名字
  317. Area string `json:"area"` //省
  318. City string `json:"city"` //市
  319. District string `json:"district"` //区、县
  320. Domain string `json:"domain"` //地址
  321. Status int `json:"status"` //
  322. }
  323. //二分字符串查找
  324. func BinarySearch(s []string, k string) int {
  325. sort.Strings(s)
  326. lo, hi := 0, len(s)-1
  327. for lo <= hi {
  328. m := (lo + hi) >> 1
  329. if s[m] < k {
  330. lo = m + 1
  331. } else if s[m] > k {
  332. hi = m - 1
  333. } else {
  334. return m
  335. }
  336. }
  337. return -1
  338. }
  339. //计算文本相似度
  340. func CosineSimilar(srcWords1, dstWords1 string) float64 {
  341. srcWords, dstWords := strings.Split(srcWords1, ""), strings.Split(dstWords1, "")
  342. // get all words
  343. allWordsMap := make(map[string]int, 0)
  344. for _, word := range srcWords {
  345. if _, found := allWordsMap[word]; !found {
  346. allWordsMap[word] = 1
  347. } else {
  348. allWordsMap[word] += 1
  349. }
  350. }
  351. for _, word := range dstWords {
  352. if _, found := allWordsMap[word]; !found {
  353. allWordsMap[word] = 1
  354. } else {
  355. allWordsMap[word] += 1
  356. }
  357. }
  358. // stable the sort
  359. allWordsSlice := make([]string, 0)
  360. for word, _ := range allWordsMap {
  361. allWordsSlice = append(allWordsSlice, word)
  362. }
  363. // assemble vector
  364. srcVector := make([]int, len(allWordsSlice))
  365. dstVector := make([]int, len(allWordsSlice))
  366. for _, word := range srcWords {
  367. if index := BinarySearch(allWordsSlice, word); index != -1 {
  368. srcVector[index] += 1
  369. }
  370. }
  371. for _, word := range dstWords {
  372. if index := BinarySearch(allWordsSlice, word); index != -1 {
  373. dstVector[index] += 1
  374. }
  375. }
  376. // calc cos
  377. numerator := float64(0)
  378. srcSq := 0
  379. dstSq := 0
  380. for i, srcCount := range srcVector {
  381. dstCount := dstVector[i]
  382. numerator += float64(srcCount * dstCount)
  383. srcSq += srcCount * srcCount
  384. dstSq += dstCount * dstCount
  385. }
  386. denominator := math.Sqrt(float64(srcSq * dstSq))
  387. v1 := numerator / denominator
  388. // if v1 > 0.6 {
  389. // log.Println(v1, srcWords1, dstWords1)
  390. // }
  391. return v1
  392. }
  393. func initWinnerRegexp() {
  394. winRegMap := Sysconfig["winner"].(map[string]interface{})
  395. preRegexps := winRegMap["pre_regexp"].([]interface{})
  396. backRegexps := winRegMap["back_regexp"].([]interface{})
  397. backRepRegexps := winRegMap["back_rep_regexp"].([]interface{})
  398. backBlack := winRegMap["blacklist"].([]interface{})
  399. var winPreRegexps []*regexp.Regexp
  400. for _, v := range preRegexps {
  401. reg := regexp.MustCompile("^" + v.(string))
  402. winPreRegexps = append(winPreRegexps, reg)
  403. }
  404. PreRegexp["winner"] = winPreRegexps
  405. var winBackRegexps []*regexp.Regexp
  406. for _, v := range backRegexps {
  407. reg := regexp.MustCompile(v.(string))
  408. winBackRegexps = append(winBackRegexps, reg)
  409. }
  410. BackRegexp["winner"] = winBackRegexps
  411. var winBackRepRegexps []RegexpInfo
  412. for _, v := range backRepRegexps {
  413. reps := strings.Split(v.(string), "#")
  414. if len(reps) > 1 {
  415. reg := RegexpInfo{
  416. regs: regexp.MustCompile(reps[0]),
  417. repstr: reps[1],
  418. }
  419. winBackRepRegexps = append(winBackRepRegexps, reg)
  420. }
  421. }
  422. BackRepRegexp["winner"] = winBackRepRegexps
  423. var winBlackRegexps []*regexp.Regexp
  424. for _, v := range backBlack {
  425. reg := regexp.MustCompile(v.(string))
  426. winBlackRegexps = append(winBlackRegexps, reg)
  427. }
  428. BlackRegexp["winner"] = winBlackRegexps
  429. }
  430. func initBuyerRegexp() {
  431. buyRegMap := Sysconfig["buyer"].(map[string]interface{})
  432. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  433. backRegexps := buyRegMap["back_regexp"].([]interface{})
  434. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  435. backBlack := buyRegMap["blacklist"].([]interface{})
  436. var winPreRegexps []*regexp.Regexp
  437. for _, v := range preRegexps {
  438. reg := regexp.MustCompile("^" + v.(string))
  439. winPreRegexps = append(winPreRegexps, reg)
  440. }
  441. PreRegexp["buyer"] = winPreRegexps
  442. var winBackRegexps []*regexp.Regexp
  443. for _, v := range backRegexps {
  444. reg := regexp.MustCompile(v.(string))
  445. winBackRegexps = append(winBackRegexps, reg)
  446. }
  447. BackRegexp["buyer"] = winBackRegexps
  448. var winBackRepRegexps []RegexpInfo
  449. for _, v := range backRepRegexps {
  450. reps := strings.Split(v.(string), "#")
  451. if len(reps) > 1 {
  452. reg := RegexpInfo{
  453. regs: regexp.MustCompile(reps[0]),
  454. repstr: reps[1],
  455. }
  456. winBackRepRegexps = append(winBackRepRegexps, reg)
  457. }
  458. }
  459. BackRepRegexp["buyer"] = winBackRepRegexps
  460. var winBlackRegexps []*regexp.Regexp
  461. for _, v := range backBlack {
  462. reg := regexp.MustCompile(v.(string))
  463. winBlackRegexps = append(winBlackRegexps, reg)
  464. }
  465. BlackRegexp["buyer"] = winBlackRegexps
  466. }
  467. func initAgencyRegexp() {
  468. buyRegMap := Sysconfig["agency"].(map[string]interface{})
  469. preRegexps := buyRegMap["pre_regexp"].([]interface{})
  470. backRegexps := buyRegMap["back_regexp"].([]interface{})
  471. backRepRegexps := buyRegMap["back_rep_regexp"].([]interface{})
  472. backBlack := buyRegMap["blacklist"].([]interface{})
  473. var winPreRegexps []*regexp.Regexp
  474. for _, v := range preRegexps {
  475. reg := regexp.MustCompile("^" + v.(string))
  476. winPreRegexps = append(winPreRegexps, reg)
  477. }
  478. PreRegexp["agency"] = winPreRegexps
  479. var winBackRegexps []*regexp.Regexp
  480. for _, v := range backRegexps {
  481. reg := regexp.MustCompile(v.(string))
  482. winBackRegexps = append(winBackRegexps, reg)
  483. }
  484. BackRegexp["agency"] = winBackRegexps
  485. var winBackRepRegexps []RegexpInfo
  486. for _, v := range backRepRegexps {
  487. reps := strings.Split(v.(string), "#")
  488. if len(reps) > 1 {
  489. reg := RegexpInfo{
  490. regs: regexp.MustCompile(reps[0]),
  491. repstr: reps[1],
  492. }
  493. winBackRepRegexps = append(winBackRepRegexps, reg)
  494. }
  495. }
  496. BackRepRegexp["agency"] = winBackRepRegexps
  497. var winBlackRegexps []*regexp.Regexp
  498. for _, v := range backBlack {
  499. reg := regexp.MustCompile(v.(string))
  500. winBlackRegexps = append(winBlackRegexps, reg)
  501. }
  502. BlackRegexp["agency"] = winBlackRegexps
  503. }