init.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. package main
  2. import (
  3. "log"
  4. mu "mfw/util"
  5. "qfw/util"
  6. "qfw/util/mongodb"
  7. "qfw/util/redis"
  8. "regexp"
  9. "sync"
  10. "gopkg.in/mgo.v2/bson"
  11. )
  12. const (
  13. ProjectCache = "info" //存放每条项目信息,key为项目ID
  14. )
  15. var (
  16. Sysconfig map[string]interface{} //读取配置文件
  17. MongoTool mongodb.MongodbSim //mongodb连接
  18. ExtractColl, ProjectColl string //抽取表、项目表
  19. CurrentMegerTime int64 //当前合并到的信息的时间
  20. CurrentMegerCount int //当前合并计数
  21. MultiThread = make(chan bool, 5) //项目合并线程
  22. AllIdsMap2 = map[string]*ID{}
  23. AllIdsMapLock = sync.Mutex{}
  24. )
  25. var (
  26. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  27. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  28. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  29. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  30. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  31. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  32. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  33. nreg1 = regexp.MustCompile("[0-9]{2,}")
  34. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  35. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  36. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  37. //存放项目名称
  38. mapPn = map[string]*Key{}
  39. //存放项目编号
  40. mapPc = map[string]*Key{}
  41. //存放采购单位
  42. mapPb = map[string]*Key{}
  43. compareNoPass = map[string]bool{}
  44. compareAB = map[string]bool{}
  45. compareAB2D = map[string]bool{}
  46. compareABD = map[string]bool{}
  47. compareAB2CD = map[string]bool{}
  48. compareABCD = map[string]bool{}
  49. )
  50. func init() {
  51. util.ReadConfig(&Sysconfig)
  52. MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 5))
  53. redis.InitRedisBySize(Sysconfig["redisaddrs"].(string), util.IntAllDef(Sysconfig["redisPoolSize"], 60), 10, 300)
  54. MongoTool = mongodb.MongodbSim{
  55. MongodbAddr: Sysconfig["mongodbServers"].(string),
  56. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  57. DbName: Sysconfig["mongodbName"].(string),
  58. }
  59. MongoTool.InitPool()
  60. ExtractColl = Sysconfig["extractColl"].(string)
  61. ProjectColl = Sysconfig["projectColl"].(string)
  62. udpport, _ := Sysconfig["udpport"].(string)
  63. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  64. udpclient.Listen(processUdpMsg)
  65. log.Println("Udp服务监听", udpport)
  66. //---不能通过
  67. vm := []string{"C", "D"}
  68. for i := 0; i < 2; i++ {
  69. for j := 0; j < 2; j++ {
  70. for k := 0; k < 2; k++ {
  71. key := vm[i] + vm[j] + vm[k]
  72. compareNoPass[key] = true
  73. //fmt.Println(key)
  74. }
  75. }
  76. }
  77. //fmt.Println("-------------------")
  78. //三个元素一致 [AB][AB][AB],分值最高
  79. vm = []string{"A", "B"}
  80. for i := 0; i < 2; i++ {
  81. for j := 0; j < 2; j++ {
  82. for k := 0; k < 2; k++ {
  83. key := vm[i] + vm[j] + vm[k]
  84. compareAB[key] = true
  85. //fmt.Println(key)
  86. }
  87. }
  88. }
  89. //fmt.Println("-------------------", len(compareAB))
  90. //---至少两个一致,其他可能不存在
  91. //[AB][AB][ABD]
  92. //[AB][ABD][AB]
  93. vm = []string{"A", "B"}
  94. vm2 := []string{"A", "B", "D"}
  95. for i := 0; i < 2; i++ {
  96. for j := 0; j < 2; j++ {
  97. for k := 0; k < 3; k++ {
  98. key := vm[i] + vm[j] + vm2[k]
  99. if !compareAB[key] {
  100. compareAB2D[key] = true
  101. //fmt.Println(key)
  102. }
  103. }
  104. }
  105. }
  106. for i := 0; i < 2; i++ {
  107. for j := 0; j < 3; j++ {
  108. for k := 0; k < 2; k++ {
  109. key := vm[i] + vm2[j] + vm[k]
  110. if !compareAB[key] {
  111. compareAB2D[key] = true
  112. //fmt.Println(key)
  113. }
  114. }
  115. }
  116. }
  117. //fmt.Println("-------------------", len(compareAB2D))
  118. //---至少一个一致,其他可能不存在
  119. //[ABD][ABD][ABD] //已经删除DDD
  120. vm = []string{"A", "B", "D"}
  121. for i := 0; i < 3; i++ {
  122. for j := 0; j < 3; j++ {
  123. for k := 0; k < 3; k++ {
  124. key := vm[i] + vm[j] + vm[k]
  125. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  126. compareABD[key] = true
  127. //fmt.Println(key)
  128. }
  129. }
  130. }
  131. }
  132. //fmt.Println("-------------------", len(compareABD))
  133. //[AB][ABCD][AB]
  134. //[AB][AB][ABCD]
  135. vm = []string{"A", "B"}
  136. vm2 = []string{"A", "B", "C", "D"}
  137. for i := 0; i < 2; i++ {
  138. for j := 0; j < 4; j++ {
  139. for k := 0; k < 2; k++ {
  140. key := vm[i] + vm2[j] + vm[k]
  141. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  142. compareAB2CD[key] = true
  143. //fmt.Println(key)
  144. }
  145. }
  146. }
  147. }
  148. for i := 0; i < 2; i++ {
  149. for j := 0; j < 2; j++ {
  150. for k := 0; k < 4; k++ {
  151. key := vm[i] + vm[j] + vm2[k]
  152. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  153. compareAB2CD[key] = true
  154. //fmt.Println(key)
  155. }
  156. }
  157. }
  158. }
  159. //fmt.Println("-------------------", len(compareAB2CD))
  160. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  161. vm = []string{"A", "B", "C", "D"}
  162. for i := 0; i < 4; i++ {
  163. for j := 0; j < 4; j++ {
  164. for k := 0; k < 4; k++ {
  165. key := vm[i] + vm[j] + vm[k]
  166. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  167. compareABCD[key] = true
  168. //fmt.Println(key)
  169. }
  170. }
  171. }
  172. }
  173. }
  174. func CheckHanAndNum(str string) (b bool) {
  175. return nreg1.MatchString(str) && hreg1.MatchString(str)
  176. }
  177. func CheckZimuAndNum(str string) (b bool) {
  178. return zreg1.MatchString(str) && nreg1.MatchString(str)
  179. }
  180. type KeyMap struct {
  181. Lock sync.Mutex
  182. Map map[string]*Key
  183. }
  184. type ID struct {
  185. Id string
  186. Lock sync.Mutex
  187. lastTime int64
  188. pos int
  189. }
  190. type Key struct {
  191. Arr []string
  192. Lock sync.Mutex
  193. }
  194. type IdAndLock struct {
  195. Id string
  196. Lock sync.Mutex
  197. }
  198. func NewKeyMap() *KeyMap {
  199. return &KeyMap{
  200. Map: map[string]*Key{},
  201. Lock: sync.Mutex{},
  202. }
  203. }
  204. //招标信息实体类
  205. type Info struct {
  206. Id string `json:"_id"`
  207. Href string `json:"href"` //源地址
  208. Publishtime int64 `json:"publishtime"`
  209. Title string `json:"title"`
  210. TopType string `json:"toptype"`
  211. SubType string `json:"subtype"`
  212. ProjectName string `json:"projectname"`
  213. ProjectCode string `json:"projectcode"`
  214. Buyer string `json:"buyer"`
  215. Buyerperson string `json:"buyerperson"`
  216. Buyertel string `json:"buyertel"`
  217. Agency string `json:"agency"`
  218. Area string `json:"area"`
  219. City string `json:"city"`
  220. District string `json:"district"`
  221. HasPackage bool `json:"haspackage"`
  222. Package map[string]interface{} `json:"package"`
  223. PNum string `json:"pnum"`
  224. Topscopeclass []string `json:"topscopeclass"`
  225. Subscopeclass []string `json:"subscopeclass"`
  226. Buyerclass string `json:"buyerclass"`
  227. Bidopentime int64 `json:"bidopentime"`
  228. budget float64 `json:"budget"`
  229. bidamount float64 `json:"bidamount"`
  230. Winners []string
  231. dealtype int
  232. Winnerorder []string
  233. PTC string //从标题中抽的项目编号
  234. pnbval int //项目名称、编号、采购单位存在的个数
  235. LenPC int //项目编号长度
  236. LenPN int //项目名称长度
  237. LenPTC int //标题抽的项目编号长度
  238. }
  239. //项目实体类
  240. type ProjectInfo struct {
  241. Id bson.ObjectId `bson:"_id"`
  242. FirstTime int64 `json:"firsttime"` //项目的最早时间
  243. LastTime int64 `json:"lasttime"` //项目的最后时间
  244. Ids []string `json:"ids"`
  245. Topscopeclass []string `json:"topscopeclass"`
  246. Subscopeclass []string `json:"subscopeclass"` //子行业分类
  247. Winners []string `json:"winners"` //中标人
  248. ProjectName string `json:"projectname"` //项目名称
  249. ProjectCode string `json:"projectcode"` //项目代码唯一(纯数字的权重低)
  250. Buyer string `json:"buyer"` //采购单位唯一
  251. MPN []string `json:"mpn"` //合并后多余的项目名称
  252. MPC []string `json:"mpc"` //合并后多余的项目编号
  253. Buyerperson string `json:"buyerperson"` //采购联系人
  254. Buyertel string `json:"buyertel"` //采购联系人电话
  255. Agency string `json:"agency"` //代理机构
  256. Area string `json:"area"` //地区
  257. City string `json:"city"` //地市
  258. District string `json:"district"` //区县
  259. HasPackage bool `json:"haspackage"` //是否有分包
  260. Package map[string]interface{} `json:"package"` //分包的对比对象
  261. Buyerclass string `json:"buyerclass"` //采购单位分类
  262. Bidopentime int64 `json:"bidopentime"` //开标时间
  263. Zbtime int64 `json:"zbtime"` //招标时间
  264. Jgtime int64 `json:"jgtime"` //结果中标时间
  265. Bidamount float64 `json:"bidamount"` //中标金额
  266. Budget float64 `json:"budget"` //预算
  267. Winnerorder []string `json:"winnerorder"` //中标候选人
  268. score int
  269. comStr string
  270. }