init.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. package main
  2. import (
  3. "log"
  4. mu "mfw/util"
  5. "qfw/util"
  6. "qfw/util/mongodb"
  7. "regexp"
  8. "sync"
  9. "gopkg.in/mgo.v2/bson"
  10. )
  11. const (
  12. ProjectCache = "info" //存放每条项目信息,key为项目ID
  13. )
  14. var (
  15. Sysconfig map[string]interface{} //读取配置文件
  16. MongoTool mongodb.MongodbSim //mongodb连接
  17. ExtractColl, ProjectColl string //抽取表、项目表
  18. CurrentMegerTime int64 //当前合并到的信息的时间
  19. CurrentMegerCount int //当前合并计数
  20. MultiThread = make(chan bool, 5) //项目合并线程
  21. AllIdsMap2 = map[string]*ID{}
  22. AllIdsMapLock = sync.Mutex{}
  23. )
  24. var (
  25. //判断是日期
  26. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  27. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  28. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  29. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  30. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  31. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  32. //判断带有分包、等特定词的
  33. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  34. //判断包含数值
  35. nreg1 = regexp.MustCompile("[0-9]{2,}")
  36. //判断包含字母
  37. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  38. //判断包含汉字
  39. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  40. //判断项目编号是在10以内的纯数字结构
  41. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  42. //存放项目名称
  43. mapPn = map[string]*Key{}
  44. //存放项目编号
  45. mapPc = map[string]*Key{}
  46. //存放采购单位
  47. mapPb = map[string]*Key{}
  48. //仅初始化使用
  49. compareNoPass = map[string]bool{}
  50. compareAB = map[string]bool{}
  51. compareAB2D = map[string]bool{}
  52. compareABD = map[string]bool{}
  53. compareAB2CD = map[string]bool{}
  54. compareABCD = map[string]bool{}
  55. )
  56. func init() {
  57. util.ReadConfig(&Sysconfig)
  58. MultiThread = make(chan bool, util.IntAllDef(Sysconfig["thread"], 5))
  59. MongoTool = mongodb.MongodbSim{
  60. MongodbAddr: Sysconfig["mongodbServers"].(string),
  61. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  62. DbName: Sysconfig["mongodbName"].(string),
  63. }
  64. MongoTool.InitPool()
  65. ExtractColl = Sysconfig["extractColl"].(string)
  66. ProjectColl = Sysconfig["projectColl"].(string)
  67. udpport, _ := Sysconfig["udpport"].(string)
  68. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  69. udpclient.Listen(processUdpMsg)
  70. log.Println("Udp服务监听", udpport)
  71. //---不能通过
  72. vm := []string{"C", "D"}
  73. for i := 0; i < 2; i++ {
  74. for j := 0; j < 2; j++ {
  75. for k := 0; k < 2; k++ {
  76. key := vm[i] + vm[j] + vm[k]
  77. compareNoPass[key] = true
  78. //fmt.Println(key)
  79. }
  80. }
  81. }
  82. //fmt.Println("-------------------")
  83. //三个元素一致 [AB][AB][AB],分值最高
  84. vm = []string{"A", "B"}
  85. for i := 0; i < 2; i++ {
  86. for j := 0; j < 2; j++ {
  87. for k := 0; k < 2; k++ {
  88. key := vm[i] + vm[j] + vm[k]
  89. compareAB[key] = true
  90. //fmt.Println(key)
  91. }
  92. }
  93. }
  94. //fmt.Println("-------------------", len(compareAB))
  95. //---至少两个一致,其他可能不存在
  96. //[AB][AB][ABD]
  97. //[AB][ABD][AB]
  98. vm = []string{"A", "B"}
  99. vm2 := []string{"A", "B", "D"}
  100. for i := 0; i < 2; i++ {
  101. for j := 0; j < 2; j++ {
  102. for k := 0; k < 3; k++ {
  103. key := vm[i] + vm[j] + vm2[k]
  104. if !compareAB[key] {
  105. compareAB2D[key] = true
  106. //fmt.Println(key)
  107. }
  108. }
  109. }
  110. }
  111. for i := 0; i < 2; i++ {
  112. for j := 0; j < 3; j++ {
  113. for k := 0; k < 2; k++ {
  114. key := vm[i] + vm2[j] + vm[k]
  115. if !compareAB[key] {
  116. compareAB2D[key] = true
  117. //fmt.Println(key)
  118. }
  119. }
  120. }
  121. }
  122. //fmt.Println("-------------------", len(compareAB2D))
  123. //---至少一个一致,其他可能不存在
  124. //[ABD][ABD][ABD] //已经删除DDD
  125. vm = []string{"A", "B", "D"}
  126. for i := 0; i < 3; i++ {
  127. for j := 0; j < 3; j++ {
  128. for k := 0; k < 3; k++ {
  129. key := vm[i] + vm[j] + vm[k]
  130. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  131. compareABD[key] = true
  132. //fmt.Println(key)
  133. }
  134. }
  135. }
  136. }
  137. //fmt.Println("-------------------", len(compareABD))
  138. //[AB][ABCD][AB]
  139. //[AB][AB][ABCD]
  140. vm = []string{"A", "B"}
  141. vm2 = []string{"A", "B", "C", "D"}
  142. for i := 0; i < 2; i++ {
  143. for j := 0; j < 4; j++ {
  144. for k := 0; k < 2; k++ {
  145. key := vm[i] + vm2[j] + vm[k]
  146. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  147. compareAB2CD[key] = true
  148. //fmt.Println(key)
  149. }
  150. }
  151. }
  152. }
  153. for i := 0; i < 2; i++ {
  154. for j := 0; j < 2; j++ {
  155. for k := 0; k < 4; k++ {
  156. key := vm[i] + vm[j] + vm2[k]
  157. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  158. compareAB2CD[key] = true
  159. //fmt.Println(key)
  160. }
  161. }
  162. }
  163. }
  164. //fmt.Println("-------------------", len(compareAB2CD))
  165. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  166. vm = []string{"A", "B", "C", "D"}
  167. for i := 0; i < 4; i++ {
  168. for j := 0; j < 4; j++ {
  169. for k := 0; k < 4; k++ {
  170. key := vm[i] + vm[j] + vm[k]
  171. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  172. compareABCD[key] = true
  173. //fmt.Println(key)
  174. }
  175. }
  176. }
  177. }
  178. }
  179. func CheckHanAndNum(str string) (b bool) {
  180. return nreg1.MatchString(str) && hreg1.MatchString(str)
  181. }
  182. func CheckZimuAndNum(str string) (b bool) {
  183. return zreg1.MatchString(str) && nreg1.MatchString(str)
  184. }
  185. type KeyMap struct {
  186. Lock sync.Mutex
  187. Map map[string]*Key
  188. }
  189. type ID struct {
  190. Id string
  191. Lock sync.Mutex
  192. lastTime int64
  193. pos int
  194. P *ProjectInfo
  195. }
  196. type Key struct {
  197. Arr []string
  198. Lock sync.Mutex
  199. }
  200. type IdAndLock struct {
  201. Id string
  202. Lock sync.Mutex
  203. }
  204. func NewKeyMap() *KeyMap {
  205. return &KeyMap{
  206. Map: map[string]*Key{},
  207. Lock: sync.Mutex{},
  208. }
  209. }
  210. //招标信息实体类
  211. type Info struct {
  212. Id string `json:"_id"`
  213. Href string `json:"href"` //源地址
  214. Publishtime int64 `json:"publishtime"`
  215. Title string `json:"title"`
  216. TopType string `json:"toptype"`
  217. SubType string `json:"subtype"`
  218. ProjectName string `json:"projectname"`
  219. ProjectCode string `json:"projectcode"`
  220. Buyer string `json:"buyer"`
  221. Buyerperson string `json:"buyerperson"`
  222. Buyertel string `json:"buyertel"`
  223. Agency string `json:"agency"`
  224. Area string `json:"area"`
  225. City string `json:"city"`
  226. District string `json:"district"`
  227. HasPackage bool `json:"haspackage"`
  228. Package map[string]interface{} `json:"package"`
  229. PNum string `json:"pnum"`
  230. Topscopeclass []string `json:"topscopeclass"`
  231. Subscopeclass []string `json:"subscopeclass"`
  232. Buyerclass string `json:"buyerclass"`
  233. Bidopentime int64 `json:"bidopentime"`
  234. Budget float64 `json:"budget"`
  235. Bidamount float64 `json:"bidamount"`
  236. Winners []string
  237. dealtype int
  238. Winnerorder []string
  239. PTC string //从标题中抽的项目编号
  240. pnbval int //项目名称、编号、采购单位存在的个数
  241. LenPC int //项目编号长度
  242. LenPN int //项目名称长度
  243. LenPTC int //标题抽的项目编号长度
  244. //以下三个元素做对比,计算包含时候使用
  245. PNBH int //0初始,+包含,-被包含
  246. PCBH int
  247. PTCBH int
  248. }
  249. //项目实体类
  250. type ProjectInfo struct {
  251. Id bson.ObjectId `bson:"_id"`
  252. FirstTime int64 `json:"firsttime"` //项目的最早时间
  253. LastTime int64 `json:"lasttime"` //项目的最后时间
  254. Ids []string `json:"ids"`
  255. Topscopeclass []string `json:"topscopeclass"`
  256. Subscopeclass []string `json:"subscopeclass"` //子行业分类
  257. Winners []string `json:"winners"` //中标人
  258. ProjectName string `json:"projectname"` //项目名称
  259. ProjectCode string `json:"projectcode"` //项目代码唯一(纯数字的权重低)
  260. Buyer string `json:"buyer"` //采购单位唯一
  261. MPN []string `json:"mpn"` //合并后多余的项目名称
  262. MPC []string `json:"mpc"` //合并后多余的项目编号
  263. Buyerperson string `json:"buyerperson"` //采购联系人
  264. Buyertel string `json:"buyertel"` //采购联系人电话
  265. Agency string `json:"agency"` //代理机构
  266. Area string `json:"area"` //地区
  267. City string `json:"city"` //地市
  268. District string `json:"district"` //区县
  269. HasPackage bool `json:"haspackage"` //是否有分包
  270. Package map[string]interface{} `json:"package"` //分包的对比对象
  271. Buyerclass string `json:"buyerclass"` //采购单位分类
  272. Bidopentime int64 `json:"bidopentime"` //开标时间
  273. Zbtime int64 `json:"zbtime"` //招标时间
  274. Jgtime int64 `json:"jgtime"` //结果中标时间
  275. Bidamount float64 `json:"bidamount"` //中标金额
  276. Budget float64 `json:"budget"` //预算
  277. Winnerorder []string `json:"winnerorder"` //中标候选人
  278. score int
  279. comStr string
  280. }