init.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. package main
  2. import (
  3. "fmt"
  4. util "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  5. "jygit.jydev.jianyu360.cn/data_processing/common_utils/elastic"
  6. "jygit.jydev.jianyu360.cn/data_processing/common_utils/log"
  7. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mongodb"
  8. "jygit.jydev.jianyu360.cn/data_processing/common_utils/mysqldb"
  9. "os"
  10. "sync"
  11. "tieta_data/config"
  12. )
  13. var (
  14. MongoTool *mongodb.MongodbSim
  15. MysqlTool *mysqldb.Mysql
  16. Es *elastic.Elastic
  17. findLock sync.Mutex
  18. mapPb, mapPn, mapPc map[string]*Key
  19. AllIdsMapLock sync.Mutex
  20. AllIdsMap map[string]*ID
  21. IsProject bool
  22. //仅初始化使用
  23. compareNoPass = map[string]bool{}
  24. compareAB = map[string]bool{}
  25. compareAB2D = map[string]bool{}
  26. compareABD = map[string]bool{}
  27. compareAB2CD = map[string]bool{}
  28. compareABCD = map[string]bool{}
  29. FieldArr = []string{"项目id", "事件id", "业务场景", "事件标题", "招标单位", "中标单位", "省份", "地市", "县(区)", "项目编号", "招标infoId",
  30. "招标单位运营商标签", "招标单位一级行业", "招标单位二级行业", "招标单位联系人", "招标单位联系电话", "招标数据更新时间", "招标信息信息类型",
  31. "预算金额(元)", "招标代理机构", "招标代理机构联系人", "招标代理机构联系电话", "公告地址", "招标剑鱼标讯链接", "中标infoId", "中标单位运营商标签",
  32. "中标单位联系人", "中标单位联系电话", "中标数据更新时间", "中标信息信息类型", "中标金额(元)", "公告地址(URL)", "剑鱼标讯链接"}
  33. FieldMap = map[string]string{
  34. "项目id": "_id",
  35. "事件id": "sourceinfoid",
  36. "业务场景": "scenetag",
  37. "事件标题": "projectname",
  38. "招标单位": "buyer",
  39. "中标单位": "s_winner",
  40. "省份": "area",
  41. "地市": "city",
  42. "县(区)": "district",
  43. "项目编号": "projectcode",
  44. "招标infoId": "zb_infoid",
  45. "招标单位运营商标签": "buyertag",
  46. "招标单位一级行业": "tagname",
  47. "招标单位二级行业": "buyerclass",
  48. "招标单位联系人": "buyerperson",
  49. "招标单位联系电话": "buyertel",
  50. "招标数据更新时间": "zb_updatetime",
  51. "招标信息信息类型": "bidtype",
  52. "预算金额(元)": "budget",
  53. "招标代理机构": "agency",
  54. "招标代理机构联系人": "agencyperson",
  55. "招标代理机构联系电话": "agencytel",
  56. "公告地址": "zb_href",
  57. "招标剑鱼标讯链接": "zb_jybxhref",
  58. "中标infoId": "infoid",
  59. "中标单位运营商标签": "tagname_3",
  60. "中标单位联系人": "winnerperson",
  61. "中标单位联系电话": "winnertel",
  62. "中标数据更新时间": "updatetime",
  63. "中标信息信息类型": "bidstatus",
  64. "中标金额(元)": "bidamount",
  65. "公告地址(URL)": "href",
  66. "剑鱼标讯链接": "jybxhref",
  67. }
  68. FieldArr1 = []string{"信息id", "词包", "项目id", "事件id", "标文关键词", "业务场景", "招标方式", "信息类型", "二级信息类型",
  69. "信息标题", "省", "市", "县", "发布时间", "招标/项目编号", "招标单位", "招标单位行业归类", "招标单位一级标签",
  70. "招标单位二级行业", "招标预算", "招标单位联系人", "招标单位联系方式", "中标单位", "中标单位行业归类", "中标金额",
  71. "中标单位联系人", "中标单位联系方式", "代理机构", "代理联系人", "代理联系电话", "URL", "剑鱼标讯链接", "标书获取时间", "标书截止时间",
  72. "投标开始时间", "投标截止时间", "开标时间", "是否电子招标"}
  73. FieldMap1 = map[string]string{
  74. "信息id": "_id",
  75. "词包": "rulename",
  76. "项目id": "pid",
  77. "事件id": "id",
  78. "标文关键词": "matchkey",
  79. "业务场景": "scenetag",
  80. "招标方式": "subtype",
  81. "信息类型": "toptype",
  82. "二级信息类型": "subtype",
  83. "信息标题": "title",
  84. "省": "area",
  85. "市": "city",
  86. "县": "district",
  87. "发布时间": "publishtime",
  88. "招标/项目编号": "projectcode",
  89. "招标单位": "buyer",
  90. "招标单位行业归类": "buyertag",
  91. "招标单位一级标签": "tagname",
  92. "招标单位二级行业": "buyerclass",
  93. "招标预算": "budget",
  94. "招标单位联系人": "buyerperson",
  95. "招标单位联系方式": "buyertel",
  96. "中标单位": "s_winner",
  97. "中标单位行业归类": "tagname_3",
  98. "中标金额": "bidamount",
  99. "中标单位联系人": "winnerperson",
  100. "中标单位联系方式": "winnertel",
  101. "代理机构": "agency",
  102. "代理联系人": "agencyperson",
  103. "代理联系电话": "agencytel",
  104. "URL": "href",
  105. "剑鱼标讯链接": "jybxhref",
  106. "标书获取时间": "docstarttime",
  107. "标书截止时间": "bidendtime",
  108. "投标开始时间": "bidstarttime",
  109. "投标截止时间": "bidendtime",
  110. "开标时间": "bidopentime",
  111. "是否电子招标": "bidway",
  112. }
  113. )
  114. type ID struct {
  115. Id string
  116. Lock sync.Mutex
  117. P *Project
  118. }
  119. type Key struct {
  120. Arr []string
  121. Lock sync.Mutex
  122. }
  123. func init() {
  124. wg = sync.WaitGroup{}
  125. mapPn = make(map[string]*Key, 5000000)
  126. AllIdsMap = make(map[string]*ID, 5000000)
  127. mapPb = make(map[string]*Key, 1500000)
  128. mapPn = make(map[string]*Key, 5000000)
  129. mapPc = make(map[string]*Key, 5000000)
  130. //加载项目数据
  131. //---不能通过
  132. vm := []string{"C", "D"}
  133. for i := 0; i < 2; i++ {
  134. for j := 0; j < 2; j++ {
  135. for k := 0; k < 2; k++ {
  136. key := vm[i] + vm[j] + vm[k]
  137. compareNoPass[key] = true
  138. //fmt.Println(key)
  139. }
  140. }
  141. }
  142. //fmt.Println("-------------------")
  143. //三个元素一致 [AB][AB][AB],分值最高
  144. vm = []string{"A", "B"}
  145. for i := 0; i < 2; i++ {
  146. for j := 0; j < 2; j++ {
  147. for k := 0; k < 2; k++ {
  148. key := vm[i] + vm[j] + vm[k]
  149. compareAB[key] = true
  150. //fmt.Println(key)
  151. }
  152. }
  153. }
  154. //fmt.Println("-------------------", len(compareAB))
  155. //---至少两个一致,其他可能不存在
  156. //[AB][AB][ABD]
  157. //[AB][ABD][AB]
  158. vm = []string{"A", "B"}
  159. vm2 := []string{"A", "B", "D"}
  160. for i := 0; i < 2; i++ {
  161. for j := 0; j < 2; j++ {
  162. for k := 0; k < 3; k++ {
  163. key := vm[i] + vm[j] + vm2[k]
  164. if !compareAB[key] {
  165. compareAB2D[key] = true
  166. //fmt.Println(key)
  167. }
  168. }
  169. }
  170. }
  171. for i := 0; i < 2; i++ {
  172. for j := 0; j < 3; j++ {
  173. for k := 0; k < 2; k++ {
  174. key := vm[i] + vm2[j] + vm[k]
  175. if !compareAB[key] {
  176. compareAB2D[key] = true
  177. //fmt.Println(key)
  178. }
  179. }
  180. }
  181. }
  182. //fmt.Println("-------------------", len(compareAB2D))
  183. //---至少一个一致,其他可能不存在
  184. //[ABD][ABD][ABD] //已经删除DDD
  185. vm = []string{"A", "B", "D"}
  186. for i := 0; i < 3; i++ {
  187. for j := 0; j < 3; j++ {
  188. for k := 0; k < 3; k++ {
  189. key := vm[i] + vm[j] + vm[k]
  190. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] {
  191. compareABD[key] = true
  192. //fmt.Println(key)
  193. }
  194. }
  195. }
  196. }
  197. //fmt.Println("-------------------", len(compareABD))
  198. //[AB][ABCD][AB]
  199. //[AB][AB][ABCD]
  200. vm = []string{"A", "B"}
  201. vm2 = []string{"A", "B", "C", "D"}
  202. for i := 0; i < 2; i++ {
  203. for j := 0; j < 4; j++ {
  204. for k := 0; k < 2; k++ {
  205. key := vm[i] + vm2[j] + vm[k]
  206. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  207. compareAB2CD[key] = true
  208. //fmt.Println(key)
  209. }
  210. }
  211. }
  212. }
  213. for i := 0; i < 2; i++ {
  214. for j := 0; j < 2; j++ {
  215. for k := 0; k < 4; k++ {
  216. key := vm[i] + vm[j] + vm2[k]
  217. if !compareAB[key] && !compareAB2D[key] && !compareNoPass[key] && !compareABD[key] {
  218. compareAB2CD[key] = true
  219. //fmt.Println(key)
  220. }
  221. }
  222. }
  223. }
  224. //fmt.Println("-------------------", len(compareAB2CD))
  225. //[ABECD][ABECD][ABECD] //已经删除[CD][CD][CD] //这个要重点讨论
  226. vm = []string{"A", "B", "C", "D"}
  227. for i := 0; i < 4; i++ {
  228. for j := 0; j < 4; j++ {
  229. for k := 0; k < 4; k++ {
  230. key := vm[i] + vm[j] + vm[k]
  231. if !compareAB[key] && !compareAB2D[key] && !compareABD[key] && !compareNoPass[key] && !compareAB2CD[key] {
  232. compareABCD[key] = true
  233. //fmt.Println(key)
  234. }
  235. }
  236. }
  237. }
  238. }
  239. func InitEs() {
  240. util.Debug(config.Conf.DB.Es.Addr, config.Conf.DB.Es.Index)
  241. Es = &elastic.Elastic{
  242. S_esurl: config.Conf.DB.Es.Addr,
  243. I_size: config.Conf.DB.Es.Size,
  244. Username: config.Conf.DB.Es.User,
  245. Password: config.Conf.DB.Es.Password,
  246. }
  247. Es.InitElasticSize()
  248. }
  249. func InitMysql() {
  250. dbcfg := config.Conf.DB.Mysql
  251. MysqlTool = &mysqldb.Mysql{
  252. Address: dbcfg.Addr,
  253. DBName: dbcfg.Dbname,
  254. UserName: dbcfg.User,
  255. PassWord: dbcfg.Password,
  256. }
  257. MysqlTool.Init()
  258. }
  259. func InitLog() {
  260. logcfg := config.Conf.Log
  261. err := log.InitLog(
  262. log.Path(logcfg.LogPath),
  263. log.Level(logcfg.LogLevel),
  264. log.Compress(logcfg.Compress),
  265. log.MaxSize(logcfg.MaxSize),
  266. log.MaxBackups(logcfg.MaxBackups),
  267. log.MaxAge(logcfg.MaxAge),
  268. log.Format(logcfg.Format),
  269. )
  270. if err != nil {
  271. fmt.Printf("InitLog failed: %v\n", err)
  272. os.Exit(1)
  273. }
  274. }
  275. type Project struct {
  276. MsgId string `json:"msg_Id"`
  277. ProjectId string `json:"projectId"`
  278. FirstTime int64 `json:"firsttime,omitempty"` //项目的最早时间
  279. LastTime int64 `json:"lasttime,omitempty"` //项目的最后时间
  280. ProjectName string `json:"projectname,omitempty"` //项目名称
  281. ProjectCode string `json:"projectcode,omitempty"` //项目代码
  282. Buyer string `json:"buyer,omitempty"` //采购单位
  283. Agency string `json:"agency"` //代理机构
  284. Winners string `json:"s_winner,omitempty"` //中标人
  285. Area string `json:"area"` //地区
  286. City string `json:"city"` //地市
  287. District string `json:"district"` //区县
  288. Bidamount float64 `json:"bidamount,omitempty"` //中标金额
  289. Budget float64 `json:"budget,omitempty"` //预算
  290. score int
  291. comStr string
  292. resVal, pjVal int
  293. Topscopeclass []string `json:"topscopeclass,omitempty"`
  294. Subscopeclass []string `json:"subscopeclass,omitempty"` //子行业分类
  295. Buyerperson string `json:"buyerperson"` //采购联系人
  296. Buyertel string `json:"buyertel"` //采购联系人电话
  297. AgencyPerson string `json:"agencyperson"`
  298. AgencyTel string `json:"agencytel"`
  299. WinnerPerson string `json:"winnerperson"`
  300. WinnerTel string `json:"winnertel"`
  301. Buyerclass string `json:"buyerclass"` //采购单位分类
  302. Bidopentime int64 `json:"bidopentime,omitempty"` //开标时间
  303. Jgtime int64 `json:"jgtime"` //结果中标时间
  304. Zbtime int64 `json:"zbtime"` //招标时间
  305. MultiPackage int `json:"multipackage"` // 多包标记
  306. MultiPackageId string `json:"multipackageid"`
  307. ZbInfoId string `json:"zb_infoid"` //招标公告唯一标识
  308. ZbUpdateTime int64 `json:"zb_updatetime"` //招标一级公告类型
  309. ZbTopType string `json:"zb_toptype"` //招标二级公告类型
  310. ZbHref string `json:"zb_href"`
  311. ZbJybxHref string `json:"zb_jybxhref"`
  312. InfoId string `json:"infoid"` //中标公告唯一标识
  313. UpdateTime string `json:"updatetime"` //中标公告发布时间
  314. TopType string `json:"toptype"` //中标一级公告类型
  315. SubType string `json:"subtype"` //中标二级公告类型
  316. Href string `json:"href"`
  317. JybxHref string `json:"jybxhref"`
  318. SceneTag string `json:"scenetag"` //业务场景
  319. BuyerTag string `json:"buyertag"` //招标单位行业归类
  320. TagName string `json:"tagname"` //招标单位一级标签
  321. TagName2 string `json:"tagname2"` //招标单位二级行业
  322. TagName3 string `json:"tagname3"` //中标单位行业归类
  323. }
  324. type Info struct {
  325. MsgId int64 `json:"msg_id"`
  326. Id string `json:"id"`
  327. RuleName string `json:"rulename"`
  328. TagName string `json:"tagname"`
  329. TagName2 string `json:"tagname2"`
  330. TagName3 string `json:"tagname3"`
  331. Href string `json:"href"` //源地址
  332. JybxHref string `json:"jybxhref"`
  333. Publishtime int64 `json:"publishtime"`
  334. Comeintime int64 `json:"comeintime"`
  335. Title string `json:"title"`
  336. TopType string `json:"toptype"`
  337. SubType string `json:"subtype"`
  338. ProjectName string `json:"projectname"`
  339. ProjectCode string `json:"projectcode"`
  340. Buyer string `json:"buyer"`
  341. Buyerperson string `json:"buyerperson"`
  342. Buyertel string `json:"buyertel"`
  343. WinnerTel string `json:"winnertel"`
  344. WinnerPerson string `json:"winnerperson"`
  345. Agency string `json:"agency"`
  346. AgencyPerson string `json:"agencyperson"`
  347. AgencyTel string `json:"agencytel"`
  348. Area string `json:"area"`
  349. City string `json:"city"`
  350. District string `json:"district"`
  351. Buyerclass string `json:"buyerclass"`
  352. Budget float64 `json:"budget"`
  353. Bidamount float64 `json:"bidamount"`
  354. MultiPackage int `json:"multipackage"`
  355. MultiPackageId string `json:"multipackage_id"`
  356. Nid string `json:"id_new"`
  357. Winners []string
  358. pnbval int //项目名称、编号、采购单位存在的个数
  359. LenPC int //项目编号长度
  360. LenPN int //项目名称长度
  361. LenPTC int //标题抽的项目编号长度
  362. }