init.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. package main
  2. import (
  3. "fmt"
  4. "go.mongodb.org/mongo-driver/bson/primitive"
  5. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  6. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  7. "os"
  8. "sync"
  9. "tieta_data/config"
  10. )
  11. var (
  12. MongoTool *mongodb.MongodbSim
  13. findLock sync.Mutex
  14. mapPb, mapPn, mapPc map[string]*Key
  15. AllIdsMapLock sync.Mutex
  16. AllIdsMap map[string]*ID
  17. //仅初始化使用
  18. compareNoPass = map[string]bool{}
  19. compareAB = map[string]bool{}
  20. compareAB2D = map[string]bool{}
  21. compareABD = map[string]bool{}
  22. compareAB2CD = map[string]bool{}
  23. compareABCD = map[string]bool{}
  24. FieldArr = []string{"项目id", "事件id", "业务场景", "事件标题", "招标单位", "中标单位", "省份", "地市", "县(区)", "项目编号", "招标infoId",
  25. "招标单位运营商标签", "招标单位一级行业", "招标单位二级行业", "招标单位联系人", "招标单位联系电话", "招标数据更新时间", "招标信息信息类型",
  26. "预算金额(元)", "招标代理机构", "招标代理机构联系人", "招标代理机构联系电话", "公告地址", "招标剑鱼标讯链接", "中标infoId", "中标单位运营商标签",
  27. "中标单位联系人", "中标单位联系电话", "中标数据更新时间", "中标信息信息类型", "中标金额(元)", "公告地址(URL)", "剑鱼标讯链接"}
  28. FieldMap = map[string]string{
  29. "项目id": "_id",
  30. "事件id": "sourceinfoid",
  31. "业务场景": "scenetag",
  32. "事件标题": "projectname",
  33. "招标单位": "buyer",
  34. "中标单位": "s_winner",
  35. "省份": "area",
  36. "地市": "city",
  37. "县(区)": "district",
  38. "项目编号": "projectcode",
  39. "招标infoId": "zb_infoid",
  40. "招标单位运营商标签": "buyertag",
  41. "招标单位一级行业": "tagname",
  42. "招标单位二级行业": "buyerclass",
  43. "招标单位联系人": "buyerperson",
  44. "招标单位联系电话": "buyertel",
  45. "招标数据更新时间": "zb_updatetime",
  46. "招标信息信息类型": "bidtype",
  47. "预算金额(元)": "budget",
  48. "招标代理机构": "agency",
  49. "招标代理机构联系人": "agencyperson",
  50. "招标代理机构联系电话": "agencytel",
  51. "公告地址": "zb_href",
  52. "招标剑鱼标讯链接": "zb_jybxhref",
  53. "中标infoId": "infoid",
  54. "中标单位运营商标签": "tagname_3",
  55. "中标单位联系人": "winnerperson",
  56. "中标单位联系电话": "winnertel",
  57. "中标数据更新时间": "updatetime",
  58. "中标信息信息类型": "bidstatus",
  59. "中标金额(元)": "bidamount",
  60. "公告地址(URL)": "href",
  61. "剑鱼标讯链接": "jybxhref",
  62. }
  63. FieldArr1 = []string{"信息id", "词包", "项目id", "事件id", "标文关键词", "业务场景", "招标方式", "信息类型", "二级信息类型",
  64. "信息标题", "省", "市", "县", "发布时间", "招标/项目编号", "招标单位", "招标单位行业归类", "招标单位一级标签",
  65. "招标单位二级行业", "招标预算", "招标单位联系人", "招标单位联系方式", "中标单位", "中标单位行业归类", "中标金额",
  66. "中标单位联系人", "中标单位联系方式", "代理机构", "代理联系人", "代理联系电话", "URL", "剑鱼标讯链接", "标书获取时间", "标书截止时间",
  67. "投标开始时间", "投标截止时间", "开标时间", "是否电子招标"}
  68. FieldMap1 = map[string]string{
  69. "信息id": "_id",
  70. "词包": "rulename",
  71. "项目id": "pid",
  72. "事件id": "id",
  73. "标文关键词": "matchkey",
  74. "业务场景": "scenetag",
  75. "招标方式": "subtype",
  76. "信息类型": "toptype",
  77. "二级信息类型": "subtype",
  78. "信息标题": "title",
  79. "省": "area",
  80. "市": "city",
  81. "县": "district",
  82. "发布时间": "publishtime",
  83. "招标/项目编号": "projectcode",
  84. "招标单位": "buyer",
  85. "招标单位行业归类": "buyertag",
  86. "招标单位一级标签": "tagname",
  87. "招标单位二级行业": "buyerclass",
  88. "招标预算": "budget",
  89. "招标单位联系人": "buyerperson",
  90. "招标单位联系方式": "buyertel",
  91. "中标单位": "s_winner",
  92. "中标单位行业归类": "tagname_3",
  93. "中标金额": "bidamount",
  94. "中标单位联系人": "winnerperson",
  95. "中标单位联系方式": "winnertel",
  96. "代理机构": "agency",
  97. "代理联系人": "agencyperson",
  98. "代理联系电话": "agencytel",
  99. "URL": "href",
  100. "剑鱼标讯链接": "jybxhref",
  101. "标书获取时间": "docstarttime",
  102. "标书截止时间": "bidendtime",
  103. "投标开始时间": "bidstarttime",
  104. "投标截止时间": "bidendtime",
  105. "开标时间": "bidopentime",
  106. "是否电子招标": "bidway",
  107. }
  108. )
  109. type ID struct {
  110. Id string
  111. Lock sync.Mutex
  112. P *Project
  113. }
  114. type Key struct {
  115. Arr []string
  116. Lock sync.Mutex
  117. }
  118. func init() {
  119. wg = sync.WaitGroup{}
  120. mapPn = make(map[string]*Key, 5000000)
  121. AllIdsMap = make(map[string]*ID, 5000000)
  122. mapPb = make(map[string]*Key, 1500000)
  123. mapPn = make(map[string]*Key, 5000000)
  124. mapPc = make(map[string]*Key, 5000000)
  125. //加载项目数据
  126. //---不能通过
  127. vm := []string{"C", "D"}
  128. for i := 0; i < 2; i++ {
  129. for j := 0; j < 2; j++ {
  130. for k := 0; k < 2; k++ {
  131. key := vm[i] + vm[j] + vm[k]
  132. compareNoPass[key] = true
  133. //fmt.Println(key)
  134. }
  135. }
  136. }
  137. //fmt.Println("-------------------")
  138. //三个元素一致 [AB][AB][AB],分值最高
  139. vm = []string{"A", "B"}
  140. for i := 0; i < 2; i++ {
  141. for j := 0; j < 2; j++ {
  142. for k := 0; k < 2; k++ {
  143. key := vm[i] + vm[j] + vm[k]
  144. compareAB[key] = true
  145. //fmt.Println(key)
  146. }
  147. }
  148. }
  149. //fmt.Println("-------------------", len(compareAB))
  150. //---至少两个一致,其他可能不存在
  151. //[AB][AB][ABD]
  152. //[AB][ABD][AB]
  153. vm = []string{"A", "B"}
  154. vm2 := []string{"A", "B", "D"}
  155. for i := 0; i < 2; i++ {
  156. for j := 0; j < 2; j++ {
  157. for k := 0; k < 3; k++ {
  158. key := vm[i] + vm[j] + vm2[k]
  159. if !compareAB[key] {
  160. compareAB2D[key] = true
  161. //fmt.Println(key)
  162. }
  163. }
  164. }
  165. }
  166. for i := 0; i < 2; i++ {
  167. for j := 0; j < 3; j++ {
  168. for k := 0; k < 2; k++ {
  169. key := vm[i] + vm2[j] + vm[k]
  170. if !compareAB[key] {
  171. compareAB2D[key] = true
  172. //fmt.Println(key)
  173. }
  174. }
  175. }
  176. }
  177. //fmt.Println("-------------------", len(compareAB2D))
  178. //---至少一个一致,其他可能不存在
  179. //[ABD][ABD][ABD] //已经删除DDD
  180. vm = []string{"A", "B", "D"}
  181. for i := 0; i < 3; i++ {
  182. for j := 0; j < 3; j++ {
  183. for k := 0; k < 3; k++ {
  184. key := vm[i] + vm[j] + vm[k]
  185. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  186. compareABD[key] = true
  187. //fmt.Println(key)
  188. }
  189. }
  190. }
  191. }
  192. //fmt.Println("-------------------", len(compareABD))
  193. //[AB][ABCD][AB]
  194. //[AB][AB][ABCD]
  195. vm = []string{"A", "B"}
  196. vm2 = []string{"A", "B", "C", "D"}
  197. for i := 0; i < 2; i++ {
  198. for j := 0; j < 4; j++ {
  199. for k := 0; k < 2; k++ {
  200. key := vm[i] + vm2[j] + vm[k]
  201. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  202. compareAB2CD[key] = true
  203. //fmt.Println(key)
  204. }
  205. }
  206. }
  207. }
  208. for i := 0; i < 2; i++ {
  209. for j := 0; j < 2; j++ {
  210. for k := 0; k < 4; k++ {
  211. key := vm[i] + vm[j] + vm2[k]
  212. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  213. compareAB2CD[key] = true
  214. //fmt.Println(key)
  215. }
  216. }
  217. }
  218. }
  219. //fmt.Println("-------------------", len(compareAB2CD))
  220. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  221. vm = []string{"A", "B", "C", "D"}
  222. for i := 0; i < 4; i++ {
  223. for j := 0; j < 4; j++ {
  224. for k := 0; k < 4; k++ {
  225. key := vm[i] + vm[j] + vm[k]
  226. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  227. compareABCD[key] = true
  228. //fmt.Println(key)
  229. }
  230. }
  231. }
  232. }
  233. }
  234. func InitMgo() {
  235. MongoTool = &mongodb.MongodbSim{
  236. MongodbAddr: config.Conf.DB.Mongo.Addr,
  237. Size: config.Conf.DB.Mongo.Size,
  238. DbName: config.Conf.DB.Mongo.Dbname,
  239. UserName: config.Conf.DB.Mongo.User,
  240. Password: config.Conf.DB.Mongo.Password,
  241. }
  242. MongoTool.InitPool()
  243. }
  244. func InitLog() {
  245. logcfg := config.Conf.Log
  246. err := log.InitLog(
  247. log.Path(logcfg.LogPath),
  248. log.Level(logcfg.LogLevel),
  249. log.Compress(logcfg.Compress),
  250. log.MaxSize(logcfg.MaxSize),
  251. log.MaxBackups(logcfg.MaxBackups),
  252. log.MaxAge(logcfg.MaxAge),
  253. log.Format(logcfg.Format),
  254. )
  255. if err != nil {
  256. fmt.Printf("InitLog failed: %v\n", err)
  257. os.Exit(1)
  258. }
  259. }
  260. type Project struct {
  261. Id primitive.ObjectID `json:"_id"`
  262. Ids []string `json:"ids,omitempty"`
  263. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  264. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  265. ProjectName string `json:"projectname,omitempty"` //项目名称
  266. ProjectCode string `json:"projectcode,omitempty"` //项目代码
  267. Buyer string `json:"buyer,omitempty"` //采购单位
  268. Agency string `json:"agency"` //代理机构
  269. Area string `json:"area"` //地区
  270. City string `json:"city"` //地市
  271. District string `json:"district"` //区县
  272. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  273. Budget float64 `json:"budget,omitempty"` //预算
  274. score int
  275. comStr string
  276. MPN []string `json:"mpn,omitempty"` //合并后多余的项目名称
  277. MPC []string `json:"mpc,omitempty"` //合并后多余的项目编号
  278. resVal, pjVal int
  279. Topscopeclass []string `json:"topscopeclass,omitempty"`
  280. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  281. Winners string `json:"s_winner,omitempty"` //中标人
  282. Buyerperson string `json:"buyerperson"` //采购联系人
  283. Buyertel string `json:"buyertel"` //采购联系人电话
  284. AgencyPerson string `json:"agencyperson"`
  285. AgencyTel string `json:"agencytel"`
  286. WinnerPerson string `json:"winnerperson"`
  287. WinnerTel string `json:"winnertel"`
  288. Buyerclass string `json:"buyerclass"` //采购单位分类
  289. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  290. Jgtime int64 `json:"jgtime"` //结果中标时间
  291. Zbtime int64 `json:"zbtime"` //招标时间
  292. SceneTag string `json:"scenetag"` //业务场景
  293. BuyerTag string `json:"buyertag"` //招标单位行业归类
  294. TagName string `json:"tagname"` //招标单位一级标签
  295. TagName2 string `json:"tagname_2"` //招标单位二级行业
  296. TagName3 string `json:"tagname_3"` //中标单位行业归类
  297. }
  298. type Info struct {
  299. Id string `json:"id"`
  300. Href string `json:"href"` //源地址
  301. Publishtime int64 `json:"publishtime"`
  302. Comeintime int64 `json:"comeintime"`
  303. Title string `json:"title"`
  304. TopType string `json:"toptype"`
  305. SubType string `json:"subtype"`
  306. ProjectName string `json:"projectname"`
  307. ProjectCode string `json:"projectcode"`
  308. Buyer string `json:"buyer"`
  309. Buyerperson string `json:"buyerperson"`
  310. Buyertel string `json:"buyertel"`
  311. Agency string `json:"agency"`
  312. Area string `json:"area"`
  313. City string `json:"city"`
  314. District string `json:"district"`
  315. Topscopeclass []string `json:"topscopeclass"`
  316. Subscopeclass []string `json:"subscopeclass"`
  317. Buyerclass string `json:"buyerclass"`
  318. Bidopentime int64 `json:"bidopentime"`
  319. Budget float64 `json:"budget"`
  320. Bidamount float64 `json:"bidamount"`
  321. Winners []string
  322. PTC string //从标题中抽的项目编号
  323. pnbval int //项目名称、编号、采购单位存在的个数
  324. LenPC int //项目编号长度
  325. LenPN int //项目名称长度
  326. LenPTC int //标题抽的项目编号长度
  327. //以下三个元素做对比,计算包含时候使用
  328. PNBH int //0初始,+包含,-被包含
  329. PCBH int
  330. PTCBH int
  331. }