init.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. package main
  2. import (
  3. "fmt"
  4. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
  6. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  7. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  8. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mysqldb"
  9. "os"
  10. "sync"
  11. "tieta_data/config"
  12. )
  13. var (
  14. MongoTool *mongodb.MongodbSim
  15. MysqlTool *mysqldb.Mysql
  16. Es *elastic.Elastic
  17. findLock sync.Mutex
  18. mapPb, mapPn, mapPc map[string]*Key
  19. AllIdsMapLock sync.Mutex
  20. AllIdsMap map[string]*ID
  21. //仅初始化使用
  22. compareNoPass = map[string]bool{}
  23. compareAB = map[string]bool{}
  24. compareAB2D = map[string]bool{}
  25. compareABD = map[string]bool{}
  26. compareAB2CD = map[string]bool{}
  27. compareABCD = map[string]bool{}
  28. FieldArr = []string{"项目id", "事件id", "业务场景", "事件标题", "招标单位", "中标单位", "省份", "地市", "县(区)", "项目编号", "招标infoId",
  29. "招标单位运营商标签", "招标单位一级行业", "招标单位二级行业", "招标单位联系人", "招标单位联系电话", "招标数据更新时间", "招标信息信息类型",
  30. "预算金额(元)", "招标代理机构", "招标代理机构联系人", "招标代理机构联系电话", "公告地址", "招标剑鱼标讯链接", "中标infoId", "中标单位运营商标签",
  31. "中标单位联系人", "中标单位联系电话", "中标数据更新时间", "中标信息信息类型", "中标金额(元)", "公告地址(URL)", "剑鱼标讯链接"}
  32. FieldMap = map[string]string{
  33. "项目id": "_id",
  34. "事件id": "sourceinfoid",
  35. "业务场景": "scenetag",
  36. "事件标题": "projectname",
  37. "招标单位": "buyer",
  38. "中标单位": "s_winner",
  39. "省份": "area",
  40. "地市": "city",
  41. "县(区)": "district",
  42. "项目编号": "projectcode",
  43. "招标infoId": "zb_infoid",
  44. "招标单位运营商标签": "buyertag",
  45. "招标单位一级行业": "tagname",
  46. "招标单位二级行业": "buyerclass",
  47. "招标单位联系人": "buyerperson",
  48. "招标单位联系电话": "buyertel",
  49. "招标数据更新时间": "zb_updatetime",
  50. "招标信息信息类型": "bidtype",
  51. "预算金额(元)": "budget",
  52. "招标代理机构": "agency",
  53. "招标代理机构联系人": "agencyperson",
  54. "招标代理机构联系电话": "agencytel",
  55. "公告地址": "zb_href",
  56. "招标剑鱼标讯链接": "zb_jybxhref",
  57. "中标infoId": "infoid",
  58. "中标单位运营商标签": "tagname_3",
  59. "中标单位联系人": "winnerperson",
  60. "中标单位联系电话": "winnertel",
  61. "中标数据更新时间": "updatetime",
  62. "中标信息信息类型": "bidstatus",
  63. "中标金额(元)": "bidamount",
  64. "公告地址(URL)": "href",
  65. "剑鱼标讯链接": "jybxhref",
  66. }
  67. FieldArr1 = []string{"信息id", "词包", "项目id", "事件id", "标文关键词", "业务场景", "招标方式", "信息类型", "二级信息类型",
  68. "信息标题", "省", "市", "县", "发布时间", "招标/项目编号", "招标单位", "招标单位行业归类", "招标单位一级标签",
  69. "招标单位二级行业", "招标预算", "招标单位联系人", "招标单位联系方式", "中标单位", "中标单位行业归类", "中标金额",
  70. "中标单位联系人", "中标单位联系方式", "代理机构", "代理联系人", "代理联系电话", "URL", "剑鱼标讯链接", "标书获取时间", "标书截止时间",
  71. "投标开始时间", "投标截止时间", "开标时间", "是否电子招标"}
  72. FieldMap1 = map[string]string{
  73. "信息id": "_id",
  74. "词包": "rulename",
  75. "项目id": "pid",
  76. "事件id": "id",
  77. "标文关键词": "matchkey",
  78. "业务场景": "scenetag",
  79. "招标方式": "subtype",
  80. "信息类型": "toptype",
  81. "二级信息类型": "subtype",
  82. "信息标题": "title",
  83. "省": "area",
  84. "市": "city",
  85. "县": "district",
  86. "发布时间": "publishtime",
  87. "招标/项目编号": "projectcode",
  88. "招标单位": "buyer",
  89. "招标单位行业归类": "buyertag",
  90. "招标单位一级标签": "tagname",
  91. "招标单位二级行业": "buyerclass",
  92. "招标预算": "budget",
  93. "招标单位联系人": "buyerperson",
  94. "招标单位联系方式": "buyertel",
  95. "中标单位": "s_winner",
  96. "中标单位行业归类": "tagname_3",
  97. "中标金额": "bidamount",
  98. "中标单位联系人": "winnerperson",
  99. "中标单位联系方式": "winnertel",
  100. "代理机构": "agency",
  101. "代理联系人": "agencyperson",
  102. "代理联系电话": "agencytel",
  103. "URL": "href",
  104. "剑鱼标讯链接": "jybxhref",
  105. "标书获取时间": "docstarttime",
  106. "标书截止时间": "bidendtime",
  107. "投标开始时间": "bidstarttime",
  108. "投标截止时间": "bidendtime",
  109. "开标时间": "bidopentime",
  110. "是否电子招标": "bidway",
  111. }
  112. )
  113. type ID struct {
  114. Id string
  115. Lock sync.Mutex
  116. P *Project
  117. }
  118. type Key struct {
  119. Arr []string
  120. Lock sync.Mutex
  121. }
  122. func init() {
  123. wg = sync.WaitGroup{}
  124. mapPn = make(map[string]*Key, 5000000)
  125. AllIdsMap = make(map[string]*ID, 5000000)
  126. mapPb = make(map[string]*Key, 1500000)
  127. mapPn = make(map[string]*Key, 5000000)
  128. mapPc = make(map[string]*Key, 5000000)
  129. //加载项目数据
  130. //---不能通过
  131. vm := []string{"C", "D"}
  132. for i := 0; i < 2; i++ {
  133. for j := 0; j < 2; j++ {
  134. for k := 0; k < 2; k++ {
  135. key := vm[i] + vm[j] + vm[k]
  136. compareNoPass[key] = true
  137. //fmt.Println(key)
  138. }
  139. }
  140. }
  141. //fmt.Println("-------------------")
  142. //三个元素一致 [AB][AB][AB],分值最高
  143. vm = []string{"A", "B"}
  144. for i := 0; i < 2; i++ {
  145. for j := 0; j < 2; j++ {
  146. for k := 0; k < 2; k++ {
  147. key := vm[i] + vm[j] + vm[k]
  148. compareAB[key] = true
  149. //fmt.Println(key)
  150. }
  151. }
  152. }
  153. //fmt.Println("-------------------", len(compareAB))
  154. //---至少两个一致,其他可能不存在
  155. //[AB][AB][ABD]
  156. //[AB][ABD][AB]
  157. vm = []string{"A", "B"}
  158. vm2 := []string{"A", "B", "D"}
  159. for i := 0; i < 2; i++ {
  160. for j := 0; j < 2; j++ {
  161. for k := 0; k < 3; k++ {
  162. key := vm[i] + vm[j] + vm2[k]
  163. if !compareAB[key] {
  164. compareAB2D[key] = true
  165. //fmt.Println(key)
  166. }
  167. }
  168. }
  169. }
  170. for i := 0; i < 2; i++ {
  171. for j := 0; j < 3; j++ {
  172. for k := 0; k < 2; k++ {
  173. key := vm[i] + vm2[j] + vm[k]
  174. if !compareAB[key] {
  175. compareAB2D[key] = true
  176. //fmt.Println(key)
  177. }
  178. }
  179. }
  180. }
  181. //fmt.Println("-------------------", len(compareAB2D))
  182. //---至少一个一致,其他可能不存在
  183. //[ABD][ABD][ABD] //已经删除DDD
  184. vm = []string{"A", "B", "D"}
  185. for i := 0; i < 3; i++ {
  186. for j := 0; j < 3; j++ {
  187. for k := 0; k < 3; k++ {
  188. key := vm[i] + vm[j] + vm[k]
  189. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  190. compareABD[key] = true
  191. //fmt.Println(key)
  192. }
  193. }
  194. }
  195. }
  196. //fmt.Println("-------------------", len(compareABD))
  197. //[AB][ABCD][AB]
  198. //[AB][AB][ABCD]
  199. vm = []string{"A", "B"}
  200. vm2 = []string{"A", "B", "C", "D"}
  201. for i := 0; i < 2; i++ {
  202. for j := 0; j < 4; j++ {
  203. for k := 0; k < 2; k++ {
  204. key := vm[i] + vm2[j] + vm[k]
  205. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  206. compareAB2CD[key] = true
  207. //fmt.Println(key)
  208. }
  209. }
  210. }
  211. }
  212. for i := 0; i < 2; i++ {
  213. for j := 0; j < 2; j++ {
  214. for k := 0; k < 4; k++ {
  215. key := vm[i] + vm[j] + vm2[k]
  216. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  217. compareAB2CD[key] = true
  218. //fmt.Println(key)
  219. }
  220. }
  221. }
  222. }
  223. //fmt.Println("-------------------", len(compareAB2CD))
  224. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  225. vm = []string{"A", "B", "C", "D"}
  226. for i := 0; i < 4; i++ {
  227. for j := 0; j < 4; j++ {
  228. for k := 0; k < 4; k++ {
  229. key := vm[i] + vm[j] + vm[k]
  230. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  231. compareABCD[key] = true
  232. //fmt.Println(key)
  233. }
  234. }
  235. }
  236. }
  237. }
  238. func InitEs() {
  239. util.Debug(config.Conf.DB.Es.Addr, config.Conf.DB.Es.Index)
  240. Es = &elastic.Elastic{
  241. S_esurl: config.Conf.DB.Es.Addr,
  242. I_size: config.Conf.DB.Es.Size,
  243. Username: config.Conf.DB.Es.User,
  244. Password: config.Conf.DB.Es.Password,
  245. }
  246. Es.InitElasticSize()
  247. }
  248. func InitMysql() {
  249. dbcfg := config.Conf.DB.Mysql
  250. MysqlTool = &mysqldb.Mysql{
  251. Address: dbcfg.Addr,
  252. DBName: dbcfg.Dbname,
  253. UserName: dbcfg.User,
  254. PassWord: dbcfg.Password,
  255. }
  256. MysqlTool.Init()
  257. }
  258. func InitLog() {
  259. logcfg := config.Conf.Log
  260. err := log.InitLog(
  261. log.Path(logcfg.LogPath),
  262. log.Level(logcfg.LogLevel),
  263. log.Compress(logcfg.Compress),
  264. log.MaxSize(logcfg.MaxSize),
  265. log.MaxBackups(logcfg.MaxBackups),
  266. log.MaxAge(logcfg.MaxAge),
  267. log.Format(logcfg.Format),
  268. )
  269. if err != nil {
  270. fmt.Printf("InitLog failed: %v\n", err)
  271. os.Exit(1)
  272. }
  273. }
  274. type Project struct {
  275. MsgId string `json:"msg_Id"`
  276. ProjectId string `json:"projectId"`
  277. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  278. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  279. ProjectName string `json:"projectname,omitempty"` //项目名称
  280. ProjectCode string `json:"projectcode,omitempty"` //项目代码
  281. Buyer string `json:"buyer,omitempty"` //采购单位
  282. Agency string `json:"agency"` //代理机构
  283. Winners string `json:"s_winner,omitempty"` //中标人
  284. Area string `json:"area"` //地区
  285. City string `json:"city"` //地市
  286. District string `json:"district"` //区县
  287. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  288. Budget float64 `json:"budget,omitempty"` //预算
  289. score int
  290. comStr string
  291. resVal, pjVal int
  292. Topscopeclass []string `json:"topscopeclass,omitempty"`
  293. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  294. Buyerperson string `json:"buyerperson"` //采购联系人
  295. Buyertel string `json:"buyertel"` //采购联系人电话
  296. AgencyPerson string `json:"agencyperson"`
  297. AgencyTel string `json:"agencytel"`
  298. WinnerPerson string `json:"winnerperson"`
  299. WinnerTel string `json:"winnertel"`
  300. Buyerclass string `json:"buyerclass"` //采购单位分类
  301. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  302. Jgtime int64 `json:"jgtime"` //结果中标时间
  303. Zbtime int64 `json:"zbtime"` //招标时间
  304. MultiPackage int `json:"multipackage"` // 多包标记
  305. MultiPackageId string `json:"multipackageid"`
  306. ZbInfoId string `json:"zb_infoid"` //招标公告唯一标识
  307. ZbUpdateTime int64 `json:"zb_updatetime"` //招标一级公告类型
  308. ZbTopType string `json:"zb_toptype"` //招标二级公告类型
  309. ZbHref string `json:"zb_href"`
  310. ZbJybxHref string `json:"zb_jybxhref"`
  311. InfoId string `json:"infoid"` //中标公告唯一标识
  312. UpdateTime string `json:"updatetime"` //中标公告发布时间
  313. TopType string `json:"toptype"` //中标一级公告类型
  314. SubType string `json:"subtype"` //中标二级公告类型
  315. Href string `json:"href"`
  316. JybxHref string `json:"jybxhref"`
  317. SceneTag string `json:"scenetag"` //业务场景
  318. BuyerTag string `json:"buyertag"` //招标单位行业归类
  319. TagName string `json:"tagname"` //招标单位一级标签
  320. TagName2 string `json:"tagname2"` //招标单位二级行业
  321. TagName3 string `json:"tagname3"` //中标单位行业归类
  322. }
  323. type Info struct {
  324. MsgId int64 `json:"msg_id"`
  325. Id string `json:"id"`
  326. RuleName string `json:"rulename"`
  327. TagName string `json:"tagname"`
  328. TagName2 string `json:"tagname2"`
  329. TagName3 string `json:"tagname3"`
  330. Href string `json:"href"` //源地址
  331. JybxHref string `json:"jybxhref"`
  332. Publishtime int64 `json:"publishtime"`
  333. Comeintime int64 `json:"comeintime"`
  334. Title string `json:"title"`
  335. TopType string `json:"toptype"`
  336. SubType string `json:"subtype"`
  337. ProjectName string `json:"projectname"`
  338. ProjectCode string `json:"projectcode"`
  339. Buyer string `json:"buyer"`
  340. Buyerperson string `json:"buyerperson"`
  341. Buyertel string `json:"buyertel"`
  342. WinnerTel string `json:"winnertel"`
  343. WinnerPerson string `json:"winnerperson"`
  344. Agency string `json:"agency"`
  345. AgencyPerson string `json:"agencyperson"`
  346. AgencyTel string `json:"agencytel"`
  347. Area string `json:"area"`
  348. City string `json:"city"`
  349. District string `json:"district"`
  350. Buyerclass string `json:"buyerclass"`
  351. Budget float64 `json:"budget"`
  352. Bidamount float64 `json:"bidamount"`
  353. MultiPackage int `json:"multipackage"`
  354. MultiPackageId string `json:"multipackage_id"`
  355. Nid string `json:"id_new"`
  356. Winners []string
  357. pnbval int //项目名称、编号、采购单位存在的个数
  358. LenPC int //项目编号长度
  359. LenPN int //项目名称长度
  360. LenPTC int //标题抽的项目编号长度
  361. }