init.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. package main
  2. import (
  3. "log"
  4. "math"
  5. mu "mfw/util"
  6. "qfw/util"
  7. "regexp"
  8. "sort"
  9. "strings"
  10. "sync"
  11. "go.mongodb.org/mongo-driver/bson/primitive"
  12. )
  13. const (
  14. ProjectCache = "info" //存放每条项目信息,key为项目ID
  15. )
  16. var (
  17. Sysconfig map[string]interface{} //读取配置文件
  18. MongoTool *MongodbSim //mongodb连接
  19. ExtractColl, ProjectColl, BackupColl, SiteColl string //抽取表、项目表、项目快照表、站点表
  20. UpdateColl string // 金额修改数据表
  21. Thread int //配置项线程数
  22. //NextNode []interface{}
  23. )
  24. var (
  25. //判断是日期
  26. _datereg = regexp.MustCompile("20[0-2][0-9][年-][0-9]{1,2}[月-][0-9]{1,2}[日-]([0-9]{1,2}时[0-9]{0,2})?")
  27. _numreg1 = regexp.MustCompile("^[0-9-]{1,8}$")
  28. _zimureg1 = regexp.MustCompile("^[a-zA-Z-]{1,7}$")
  29. _nzreg = regexp.MustCompile("^[0-9a-zA-Z-]+$")
  30. _hanreg = regexp.MustCompile(`^[\p{Han}::【】\\[\\]()()--、]+$`)
  31. replaceStr = regexp.MustCompile("(工程|采购|项目|[?!、【】()—()--]|栏标价|中标候选人|招标代理)")
  32. //判断带有分包、等特定词的
  33. pStr = regexp.MustCompile("(勘察|监理|施工|设计|验收|标段|分包|子包|[0-9A-Z]包|[一二三四五六七八九十0-9]批)")
  34. //判断包含数值
  35. nreg1 = regexp.MustCompile("[0-9]{2,}")
  36. //判断包含字母
  37. zreg1 = regexp.MustCompile("[a-zA-Z]{1,}")
  38. //判断包含汉字
  39. hreg1 = regexp.MustCompile(`[\p{Han}]+`)
  40. //判断项目编号是在10以内的纯数字结构
  41. numCheckPc = regexp.MustCompile("^[0-9-]{1,10}$")
  42. //仅初始化使用
  43. compareNoPass = map[string]bool{}
  44. compareAB = map[string]bool{}
  45. compareAB2D = map[string]bool{}
  46. compareABD = map[string]bool{}
  47. compareAB2CD = map[string]bool{}
  48. compareABCD = map[string]bool{}
  49. )
  50. func init() {
  51. util.ReadConfig(&Sysconfig)
  52. MongoTool = &MongodbSim{
  53. MongodbAddr: Sysconfig["mongodbServers"].(string),
  54. Size: util.IntAll(Sysconfig["mongodbPoolSize"]),
  55. DbName: Sysconfig["mongodbName"].(string),
  56. }
  57. MongoTool.InitPool()
  58. ExtractColl = Sysconfig["extractColl"].(string)
  59. ProjectColl = Sysconfig["projectColl"].(string)
  60. UpdateColl = Sysconfig["updateColl"].(string)
  61. BackupColl = Sysconfig["projectColl"].(string) + "_back"
  62. SiteColl = Sysconfig["siteColl"].(string)
  63. Thread = util.IntAll(Sysconfig["thread"])
  64. //NextNode = Sysconfig["nextNode"].([]interface{})
  65. udpport, _ := Sysconfig["udpport"].(string)
  66. udpclient = mu.UdpClient{Local: udpport, BufSize: 1024}
  67. udpclient.Listen(processUdpMsg)
  68. log.Println("Udp服务监听", udpport)
  69. //加载项目数据
  70. //---不能通过
  71. vm := []string{"C", "D"}
  72. for i := 0; i < 2; i++ {
  73. for j := 0; j < 2; j++ {
  74. for k := 0; k < 2; k++ {
  75. key := vm[i] + vm[j] + vm[k]
  76. compareNoPass[key] = true
  77. //fmt.Println(key)
  78. }
  79. }
  80. }
  81. //fmt.Println("-------------------")
  82. //三个元素一致 [AB][AB][AB],分值最高
  83. vm = []string{"A", "B"}
  84. for i := 0; i < 2; i++ {
  85. for j := 0; j < 2; j++ {
  86. for k := 0; k < 2; k++ {
  87. key := vm[i] + vm[j] + vm[k]
  88. compareAB[key] = true
  89. //fmt.Println(key)
  90. }
  91. }
  92. }
  93. //fmt.Println("-------------------", len(compareAB))
  94. //---至少两个一致,其他可能不存在
  95. //[AB][AB][ABD]
  96. //[AB][ABD][AB]
  97. vm = []string{"A", "B"}
  98. vm2 := []string{"A", "B", "D"}
  99. for i := 0; i < 2; i++ {
  100. for j := 0; j < 2; j++ {
  101. for k := 0; k < 3; k++ {
  102. key := vm[i] + vm[j] + vm2[k]
  103. if !compareAB[key] {
  104. compareAB2D[key] = true
  105. //fmt.Println(key)
  106. }
  107. }
  108. }
  109. }
  110. for i := 0; i < 2; i++ {
  111. for j := 0; j < 3; j++ {
  112. for k := 0; k < 2; k++ {
  113. key := vm[i] + vm2[j] + vm[k]
  114. if !compareAB[key] {
  115. compareAB2D[key] = true
  116. //fmt.Println(key)
  117. }
  118. }
  119. }
  120. }
  121. //fmt.Println("-------------------", len(compareAB2D))
  122. //---至少一个一致,其他可能不存在
  123. //[ABD][ABD][ABD] //已经删除DDD
  124. vm = []string{"A", "B", "D"}
  125. for i := 0; i < 3; i++ {
  126. for j := 0; j < 3; j++ {
  127. for k := 0; k < 3; k++ {
  128. key := vm[i] + vm[j] + vm[k]
  129. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  130. compareABD[key] = true
  131. //fmt.Println(key)
  132. }
  133. }
  134. }
  135. }
  136. //fmt.Println("-------------------", len(compareABD))
  137. //[AB][ABCD][AB]
  138. //[AB][AB][ABCD]
  139. vm = []string{"A", "B"}
  140. vm2 = []string{"A", "B", "C", "D"}
  141. for i := 0; i < 2; i++ {
  142. for j := 0; j < 4; j++ {
  143. for k := 0; k < 2; k++ {
  144. key := vm[i] + vm2[j] + vm[k]
  145. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  146. compareAB2CD[key] = true
  147. //fmt.Println(key)
  148. }
  149. }
  150. }
  151. }
  152. for i := 0; i < 2; i++ {
  153. for j := 0; j < 2; j++ {
  154. for k := 0; k < 4; k++ {
  155. key := vm[i] + vm[j] + vm2[k]
  156. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  157. compareAB2CD[key] = true
  158. //fmt.Println(key)
  159. }
  160. }
  161. }
  162. }
  163. //fmt.Println("-------------------", len(compareAB2CD))
  164. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  165. vm = []string{"A", "B", "C", "D"}
  166. for i := 0; i < 4; i++ {
  167. for j := 0; j < 4; j++ {
  168. for k := 0; k < 4; k++ {
  169. key := vm[i] + vm[j] + vm[k]
  170. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  171. compareABCD[key] = true
  172. //fmt.Println(key)
  173. }
  174. }
  175. }
  176. }
  177. }
  178. func CheckHanAndNum(str string) (b bool) {
  179. return nreg1.MatchString(str) && hreg1.MatchString(str)
  180. }
  181. func CheckZimuAndNum(str string) (b bool) {
  182. return zreg1.MatchString(str) && nreg1.MatchString(str)
  183. }
  184. type KeyMap struct {
  185. Lock sync.Mutex
  186. Map map[string]*Key
  187. }
  188. type ID struct {
  189. Id string
  190. Lock sync.Mutex
  191. P *ProjectInfo
  192. }
  193. type Key struct {
  194. Arr []string
  195. Lock sync.Mutex
  196. }
  197. type IdAndLock struct {
  198. Id string
  199. Lock sync.Mutex
  200. }
  201. func NewKeyMap() *KeyMap {
  202. return &KeyMap{
  203. Map: map[string]*Key{},
  204. Lock: sync.Mutex{},
  205. }
  206. }
  207. //招标信息实体类
  208. type Info struct {
  209. Id string `json:"_id"`
  210. Href string `json:"href"` //源地址
  211. Publishtime int64 `json:"publishtime"`
  212. Comeintime int64 `json:"comeintime"`
  213. Title string `json:"title"`
  214. TopType string `json:"toptype"`
  215. SubType string `json:"subtype"`
  216. ProjectName string `json:"projectname"`
  217. ProjectCode string `json:"projectcode"`
  218. ProjectScope string `json:"projectscope"`
  219. ContractCode string `json:"contractcode"`
  220. Buyer string `json:"buyer"`
  221. Buyerperson string `json:"buyerperson"`
  222. Buyertel string `json:"buyertel"`
  223. Agency string `json:"agency"`
  224. Area string `json:"area"`
  225. City string `json:"city"`
  226. District string `json:"district"`
  227. Infoformat int `json:"infoformat"`
  228. ReviewExperts []string `json:"review_experts"`
  229. Purchasing string `json:"purchasing"`
  230. WinnerOrder []map[string]interface{} `json:"winnerorder"`
  231. HasPackage bool // `json:"haspackage"`
  232. Package map[string]interface{} `json:"package"`
  233. //PNum string `json:"pnum"`
  234. Topscopeclass []string `json:"topscopeclass"`
  235. Subscopeclass []string `json:"subscopeclass"`
  236. Buyerclass string `json:"buyerclass"`
  237. Bidopentime int64 `json:"bidopentime"`
  238. Budget float64 `json:"budget"`
  239. Bidamount float64 `json:"bidamount"`
  240. Winners []string
  241. dealtype int
  242. PTC string //从标题中抽的项目编号
  243. pnbval int //项目名称、编号、采购单位存在的个数
  244. LenPC int //项目编号长度
  245. LenPN int //项目名称长度
  246. LenPTC int //标题抽的项目编号长度
  247. //以下三个元素做对比,计算包含时候使用
  248. PNBH int //0初始,+包含,-被包含
  249. PCBH int
  250. PTCBH int
  251. }
  252. //项目实体类
  253. type ProjectInfo struct {
  254. Id primitive.ObjectID `json:"_id"`
  255. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  256. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  257. Ids []string `json:"ids,omitempty"`
  258. Topscopeclass []string `json:"topscopeclass,omitempty"`
  259. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  260. Winners []string `json:"s_winner,omitempty"` //中标人
  261. ProjectName string `json:"projectname,omitempty"` //项目名称
  262. ProjectCode string `json:"projectcode,omitempty"` //项目代码唯一(纯数字的权重低)
  263. ContractCode string `json:"contractcode,omitempty"` //项目编号
  264. Buyer string `json:"buyer,omitempty"` //采购单位唯一
  265. MPN []string `json:"mpn,omitempty"` //合并后多余的项目名称
  266. MPC []string `json:"mpc,omitempty"` //合并后多余的项目编号
  267. Buyerperson string `json:"buyerperson"` //采购联系人
  268. Buyertel string `json:"buyertel"` //采购联系人电话
  269. Agency string `json:"agency"` //代理机构
  270. Area string `json:"area"` //地区
  271. City string `json:"city"` //地市
  272. District string `json:"district"` //区县
  273. Bidstatus string `json:"bidstatus"` //
  274. Bidtype string `json:"bidtype"` //
  275. ReviewExperts []string `json:"review_experts"` // 项目评审专家
  276. Purchasing string `json:"purchasing"` // 标的物
  277. //HasPackage bool `json:"haspackage"` //是否有分包
  278. Package map[string]interface{} `json:"package,omitempty"` //分包的对比对象
  279. Buyerclass string `json:"buyerclass"` //采购单位分类
  280. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  281. // Zbtime int64 `json:"zbtime"` //招标时间
  282. Jgtime int64 `json:"jgtime"` //结果中标时间
  283. Zbtime int64 `json:"zbtime"` //招标时间
  284. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  285. Budget float64 `json:"budget,omitempty"` //预算
  286. Winnerorder []string `json:"winnerorder"` //中标候选人
  287. score int
  288. comStr string
  289. resVal, pjVal int
  290. InfoFiled map[string]InfoField `json:"infofield"` //逻辑处理需要的info字段
  291. Budgettag int `json:"budgettag"` //预算是否有效标记
  292. Bidamounttag int `json:"bidamounttag"` //中标金额是否有效标记
  293. }
  294. //存储部分招标信息字段,业务逻辑处理需要
  295. type InfoField struct {
  296. Budget float64 `json:"budget"`
  297. Bidamount float64 `json:"bidamount"`
  298. ContractCode string `json:"contractcode"`
  299. ProjectName string `json:"projectname"`
  300. ProjectCode string `json:"projectcode"`
  301. Bidstatus string `json:"bidstatus"`
  302. }
  303. //站点信息
  304. type Site struct {
  305. Id string `json:"_id"`
  306. Site string `json:"site"` //站点名字
  307. Area string `json:"area"` //省
  308. City string `json:"city"` //市
  309. District string `json:"district"` //区、县
  310. Domain string `json:"domain"` //地址
  311. Status int `json:"status"` //
  312. }
  313. //二分字符串查找
  314. func BinarySearch(s []string, k string) int {
  315. sort.Strings(s)
  316. lo, hi := 0, len(s)-1
  317. for lo <= hi {
  318. m := (lo + hi) >> 1
  319. if s[m] < k {
  320. lo = m + 1
  321. } else if s[m] > k {
  322. hi = m - 1
  323. } else {
  324. return m
  325. }
  326. }
  327. return -1
  328. }
  329. //计算文本相似度
  330. func CosineSimilar(srcWords1, dstWords1 string) float64 {
  331. srcWords, dstWords := strings.Split(srcWords1, ""), strings.Split(dstWords1, "")
  332. // get all words
  333. allWordsMap := make(map[string]int, 0)
  334. for _, word := range srcWords {
  335. if _, found := allWordsMap[word]; !found {
  336. allWordsMap[word] = 1
  337. } else {
  338. allWordsMap[word] += 1
  339. }
  340. }
  341. for _, word := range dstWords {
  342. if _, found := allWordsMap[word]; !found {
  343. allWordsMap[word] = 1
  344. } else {
  345. allWordsMap[word] += 1
  346. }
  347. }
  348. // stable the sort
  349. allWordsSlice := make([]string, 0)
  350. for word, _ := range allWordsMap {
  351. allWordsSlice = append(allWordsSlice, word)
  352. }
  353. // assemble vector
  354. srcVector := make([]int, len(allWordsSlice))
  355. dstVector := make([]int, len(allWordsSlice))
  356. for _, word := range srcWords {
  357. if index := BinarySearch(allWordsSlice, word); index != -1 {
  358. srcVector[index] += 1
  359. }
  360. }
  361. for _, word := range dstWords {
  362. if index := BinarySearch(allWordsSlice, word); index != -1 {
  363. dstVector[index] += 1
  364. }
  365. }
  366. // calc cos
  367. numerator := float64(0)
  368. srcSq := 0
  369. dstSq := 0
  370. for i, srcCount := range srcVector {
  371. dstCount := dstVector[i]
  372. numerator += float64(srcCount * dstCount)
  373. srcSq += srcCount * srcCount
  374. dstSq += dstCount * dstCount
  375. }
  376. denominator := math.Sqrt(float64(srcSq * dstSq))
  377. v1 := numerator / denominator
  378. // if v1 > 0.6 {
  379. // log.Println(v1, srcWords1, dstWords1)
  380. // }
  381. return v1
  382. }