init.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. package main
  2. import (
  3. "log"
  4. mu "mfw/util"
  5. "qfw/util"
  6. "qfw/util/mongodb"
  7. "regexp"
  8. "sync"
  9. "gopkg.in/mgo.v2/bson"
  10. )
  11. const (
  12. ProjectCache = "info" //存放每条项目信息,key为项目ID
  13. )
  14. var (
  15. Sysconfig map[string]interface{} //读取配置文件
  16. MongoTool mongodb.MongodbSim //mongodb连接
  17. ExtractColl, ProjectColl string //抽取表、项目表
  18. CurrentMegerTime int64 //当前合并到的信息的时间
  19. CurrentMegerCount int //当前合并计数
  20. MultiThread = make(chan bool, 5) //项目合并线程
  21. AllIdsMap2 = map[string]*ID{}
  22. AllIdsMapLock = sync.Mutex{}
  23. )
  24. var (
  25. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  26. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  27. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  28. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  29. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  30. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  31. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  32. nreg1 = regexp.MustCompile("[0-9]{2,}")
  33. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  34. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  35. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  36. //存放项目名称
  37. mapPn = map[string]*Key{}
  38. //存放项目编号
  39. mapPc = map[string]*Key{}
  40. //存放采购单位
  41. mapPb = map[string]*Key{}
  42. compareNoPass = map[string]bool{}
  43. compareAB = map[string]bool{}
  44. compareAB2D = map[string]bool{}
  45. compareABD = map[string]bool{}
  46. compareAB2CD = map[string]bool{}
  47. compareABCD = map[string]bool{}
  48. )
  49. func init() {
  50. util.ReadConfig(&Sysconfig)
  51. MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 5))
  52. MongoTool = mongodb.MongodbSim{
  53. MongodbAddr: Sysconfig["mongodbServers"].(string),
  54. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  55. DbName: Sysconfig["mongodbName"].(string),
  56. }
  57. MongoTool.InitPool()
  58. ExtractColl = Sysconfig["extractColl"].(string)
  59. ProjectColl = Sysconfig["projectColl"].(string)
  60. udpport, _ := Sysconfig["udpport"].(string)
  61. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  62. udpclient.Listen(processUdpMsg)
  63. log.Println("Udp服务监听", udpport)
  64. //---不能通过
  65. vm := []string{"C", "D"}
  66. for i := 0; i < 2; i++ {
  67. for j := 0; j < 2; j++ {
  68. for k := 0; k < 2; k++ {
  69. key := vm[i] + vm[j] + vm[k]
  70. compareNoPass[key] = true
  71. //fmt.Println(key)
  72. }
  73. }
  74. }
  75. //fmt.Println("-------------------")
  76. //三个元素一致 [AB][AB][AB],分值最高
  77. vm = []string{"A", "B"}
  78. for i := 0; i < 2; i++ {
  79. for j := 0; j < 2; j++ {
  80. for k := 0; k < 2; k++ {
  81. key := vm[i] + vm[j] + vm[k]
  82. compareAB[key] = true
  83. //fmt.Println(key)
  84. }
  85. }
  86. }
  87. //fmt.Println("-------------------", len(compareAB))
  88. //---至少两个一致,其他可能不存在
  89. //[AB][AB][ABD]
  90. //[AB][ABD][AB]
  91. vm = []string{"A", "B"}
  92. vm2 := []string{"A", "B", "D"}
  93. for i := 0; i < 2; i++ {
  94. for j := 0; j < 2; j++ {
  95. for k := 0; k < 3; k++ {
  96. key := vm[i] + vm[j] + vm2[k]
  97. if !compareAB[key] {
  98. compareAB2D[key] = true
  99. //fmt.Println(key)
  100. }
  101. }
  102. }
  103. }
  104. for i := 0; i < 2; i++ {
  105. for j := 0; j < 3; j++ {
  106. for k := 0; k < 2; k++ {
  107. key := vm[i] + vm2[j] + vm[k]
  108. if !compareAB[key] {
  109. compareAB2D[key] = true
  110. //fmt.Println(key)
  111. }
  112. }
  113. }
  114. }
  115. //fmt.Println("-------------------", len(compareAB2D))
  116. //---至少一个一致,其他可能不存在
  117. //[ABD][ABD][ABD] //已经删除DDD
  118. vm = []string{"A", "B", "D"}
  119. for i := 0; i < 3; i++ {
  120. for j := 0; j < 3; j++ {
  121. for k := 0; k < 3; k++ {
  122. key := vm[i] + vm[j] + vm[k]
  123. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  124. compareABD[key] = true
  125. //fmt.Println(key)
  126. }
  127. }
  128. }
  129. }
  130. //fmt.Println("-------------------", len(compareABD))
  131. //[AB][ABCD][AB]
  132. //[AB][AB][ABCD]
  133. vm = []string{"A", "B"}
  134. vm2 = []string{"A", "B", "C", "D"}
  135. for i := 0; i < 2; i++ {
  136. for j := 0; j < 4; j++ {
  137. for k := 0; k < 2; k++ {
  138. key := vm[i] + vm2[j] + vm[k]
  139. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  140. compareAB2CD[key] = true
  141. //fmt.Println(key)
  142. }
  143. }
  144. }
  145. }
  146. for i := 0; i < 2; i++ {
  147. for j := 0; j < 2; j++ {
  148. for k := 0; k < 4; k++ {
  149. key := vm[i] + vm[j] + vm2[k]
  150. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  151. compareAB2CD[key] = true
  152. //fmt.Println(key)
  153. }
  154. }
  155. }
  156. }
  157. //fmt.Println("-------------------", len(compareAB2CD))
  158. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  159. vm = []string{"A", "B", "C", "D"}
  160. for i := 0; i < 4; i++ {
  161. for j := 0; j < 4; j++ {
  162. for k := 0; k < 4; k++ {
  163. key := vm[i] + vm[j] + vm[k]
  164. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  165. compareABCD[key] = true
  166. //fmt.Println(key)
  167. }
  168. }
  169. }
  170. }
  171. }
  172. func CheckHanAndNum(str string) (b bool) {
  173. return nreg1.MatchString(str) && hreg1.MatchString(str)
  174. }
  175. func CheckZimuAndNum(str string) (b bool) {
  176. return zreg1.MatchString(str) && nreg1.MatchString(str)
  177. }
  178. type KeyMap struct {
  179. Lock sync.Mutex
  180. Map map[string]*Key
  181. }
  182. type ID struct {
  183. Id string
  184. Lock sync.Mutex
  185. lastTime int64
  186. pos int
  187. P *ProjectInfo
  188. }
  189. type Key struct {
  190. Arr []string
  191. Lock sync.Mutex
  192. }
  193. type IdAndLock struct {
  194. Id string
  195. Lock sync.Mutex
  196. }
  197. func NewKeyMap() *KeyMap {
  198. return &KeyMap{
  199. Map: map[string]*Key{},
  200. Lock: sync.Mutex{},
  201. }
  202. }
  203. //招标信息实体类
  204. type Info struct {
  205. Id string `json:"_id"`
  206. Href string `json:"href"` //源地址
  207. Publishtime int64 `json:"publishtime"`
  208. Title string `json:"title"`
  209. TopType string `json:"toptype"`
  210. SubType string `json:"subtype"`
  211. ProjectName string `json:"projectname"`
  212. ProjectCode string `json:"projectcode"`
  213. Buyer string `json:"buyer"`
  214. Buyerperson string `json:"buyerperson"`
  215. Buyertel string `json:"buyertel"`
  216. Agency string `json:"agency"`
  217. Area string `json:"area"`
  218. City string `json:"city"`
  219. District string `json:"district"`
  220. HasPackage bool `json:"haspackage"`
  221. Package map[string]interface{} `json:"package"`
  222. PNum string `json:"pnum"`
  223. Topscopeclass []string `json:"topscopeclass"`
  224. Subscopeclass []string `json:"subscopeclass"`
  225. Buyerclass string `json:"buyerclass"`
  226. Bidopentime int64 `json:"bidopentime"`
  227. Budget float64 `json:"budget"`
  228. Bidamount float64 `json:"bidamount"`
  229. Winners []string
  230. dealtype int
  231. Winnerorder []string
  232. PTC string //从标题中抽的项目编号
  233. pnbval int //项目名称、编号、采购单位存在的个数
  234. LenPC int //项目编号长度
  235. LenPN int //项目名称长度
  236. LenPTC int //标题抽的项目编号长度
  237. }
  238. //项目实体类
  239. type ProjectInfo struct {
  240. Id bson.ObjectId `bson:"_id"`
  241. FirstTime int64 `json:"firsttime"` //项目的最早时间
  242. LastTime int64 `json:"lasttime"` //项目的最后时间
  243. Ids []string `json:"ids"`
  244. Topscopeclass []string `json:"topscopeclass"`
  245. Subscopeclass []string `json:"subscopeclass"` //子行业分类
  246. Winners []string `json:"winners"` //中标人
  247. ProjectName string `json:"projectname"` //项目名称
  248. ProjectCode string `json:"projectcode"` //项目代码唯一(纯数字的权重低)
  249. Buyer string `json:"buyer"` //采购单位唯一
  250. MPN []string `json:"mpn"` //合并后多余的项目名称
  251. MPC []string `json:"mpc"` //合并后多余的项目编号
  252. Buyerperson string `json:"buyerperson"` //采购联系人
  253. Buyertel string `json:"buyertel"` //采购联系人电话
  254. Agency string `json:"agency"` //代理机构
  255. Area string `json:"area"` //地区
  256. City string `json:"city"` //地市
  257. District string `json:"district"` //区县
  258. HasPackage bool `json:"haspackage"` //是否有分包
  259. Package map[string]interface{} `json:"package"` //分包的对比对象
  260. Buyerclass string `json:"buyerclass"` //采购单位分类
  261. Bidopentime int64 `json:"bidopentime"` //开标时间
  262. Zbtime int64 `json:"zbtime"` //招标时间
  263. Jgtime int64 `json:"jgtime"` //结果中标时间
  264. Bidamount float64 `json:"bidamount"` //中标金额
  265. Budget float64 `json:"budget"` //预算
  266. Winnerorder []string `json:"winnerorder"` //中标候选人
  267. score int
  268. comStr string
  269. }