bidingpurchasing.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. package main
  2. import (
  3. "log"
  4. "qfw/util"
  5. elastic "qfw/util/elastic"
  6. "sync"
  7. "unicode/utf8"
  8. u "./util"
  9. "gopkg.in/mgo.v2/bson"
  10. )
  11. //定时查询bidding中extract_state为2的数据生成索引
  12. func biddingPurchaingTask(q map[string]interface{}) {
  13. defer util.Catch()
  14. //锁
  15. SaveUpdageLock := sync.Mutex{}
  16. //连接参数
  17. c, _ := bidding["collect"].(string) //bidding表
  18. db, _ := bidding["db"].(string) //库
  19. index, _ := bidding["index"].(string) //索引别名
  20. itype, _ := bidding["type"].(string)
  21. //
  22. session := mgo.GetMgoConn(86400)
  23. defer mgo.DestoryMongoConn(session)
  24. count, _ := session.DB(db).C(c).Find(&q).Count()
  25. log.Println("biddingPurchaingTask: ", db, c, "查询语句:", q, "同步总数:", count, "elastic库:", index)
  26. query := session.DB(db).C(c).Find(q).Select(bson.M{
  27. "projectinfo.attachment": 0,
  28. "contenthtml": 0,
  29. }).Iter()
  30. arrEs := make([]map[string]interface{}, savesizei)
  31. arrMgo := [][]map[string]interface{}{}
  32. var n int
  33. i := 0
  34. for tmp := make(map[string]interface{}); query.Next(tmp); i = i + 1 {
  35. n++
  36. if util.IntAll(tmp["extracttype"]) == -1 { // || util.IntAll(tmp["dataging"]) == 1 { //重复数据不生索引
  37. tmp = make(map[string]interface{})
  38. continue
  39. }
  40. newTmp := map[string]interface{}{} //最终生索引的数据
  41. saveArr := []map[string]interface{}{}
  42. //oss拼装filetext
  43. if filetext := getFileText(tmp); len(filetext) > 10 {
  44. if site, _ := tmp["site"].(string); site == "中国招标投标公共服务平台" { //site:中国招标投标公共服务平台 detail替换成filetext 并加入标记filedetail=1
  45. tmp["detail"] = filetext
  46. saveArr = append(saveArr, map[string]interface{}{"_id": tmp["_id"]})
  47. saveArr = append(saveArr, map[string]interface{}{
  48. "$set": map[string]interface{}{
  49. "filedetail": 1,
  50. "detail": filetext,
  51. },
  52. })
  53. }
  54. newTmp["filetext"] = filetext
  55. }
  56. //purchasing
  57. if purchasing, ok := tmp["purchasing"].(string); ok {
  58. if len(purchasing) > 0 {
  59. newTmp["purchasing"] = tmp["purchasing"]
  60. }
  61. }
  62. //purchasinglist
  63. if purchasinglist, ok := tmp["purchasinglist"].([]interface{}); ok {
  64. util.Debug(len(purchasinglist))
  65. if len(purchasinglist) > 0 {
  66. purchasinglist_new := []map[string]interface{}{}
  67. for _, ls := range purchasinglist {
  68. lsm_new := make(map[string]interface{})
  69. lsm := ls.(map[string]interface{})
  70. for _, pf := range purchasinglistFields {
  71. if lsm[pf] != nil {
  72. lsm_new[pf] = lsm[pf]
  73. }
  74. }
  75. if lsm_new != nil && len(lsm_new) > 0 {
  76. purchasinglist_new = append(purchasinglist_new, lsm_new)
  77. }
  78. }
  79. util.Debug(len(purchasinglist_new), purchasinglist_new)
  80. if len(purchasinglist_new) > 0 {
  81. newTmp["purchasinglist"] = purchasinglist_new
  82. }
  83. }
  84. }
  85. //处理数据
  86. if tmp["supervisorrate"] != nil { //临时处理supervisorrate抽取类型为string不生索引
  87. if _, ok := tmp["supervisorrate"].(string); ok { //supervisorrate数据为string类型
  88. delete(tmp, "supervisorrate")
  89. }
  90. }
  91. //对projectscope字段的索引处理
  92. ps, _ := tmp["projectscope"].(string)
  93. if len(ps) > ESLEN {
  94. tmp["projectscope"] = string(([]rune(ps))[:4000])
  95. }
  96. SaveUpdageLock.Lock()
  97. for _, v := range biddingIndexFields { //索引字段
  98. if tmp[v] != nil {
  99. if "projectinfo" == v {
  100. mp, _ := tmp[v].(map[string]interface{})
  101. if mp != nil {
  102. newmap := map[string]interface{}{}
  103. for _, v1 := range projectinfoFields {
  104. if mp[v1] != nil {
  105. newmap[v1] = mp[v1]
  106. }
  107. }
  108. newTmp[v] = newmap
  109. // attachments := mp["attachments"]
  110. // con := ""
  111. // if attachments != nil {
  112. // am, _ := attachments.(map[string]interface{})
  113. // if am != nil {
  114. // for _, v1 := range am {
  115. // vm, _ := v1.(map[string]interface{})
  116. // if vm != nil {
  117. // c, _ := vm["content"].(string)
  118. // con += c
  119. // }
  120. // }
  121. // }
  122. // }
  123. // con = FilterDetailSpace(con)
  124. // if con != "" {
  125. // newTmp["attachments"] = con
  126. // }
  127. }
  128. } else {
  129. if v == "detail" {
  130. detail, _ := tmp[v].(string)
  131. newTmp[v] = FilterDetail(detail)
  132. } else {
  133. newTmp[v] = tmp[v]
  134. }
  135. }
  136. }
  137. }
  138. arrEs = append(arrEs, newTmp)
  139. if len(saveArr) > 0 {
  140. arrMgo = append(arrMgo, saveArr) //要更新数据
  141. }
  142. // arrMgo = append(arrMgo, []map[string]interface{}{ //要更新数据
  143. // map[string]interface{}{
  144. // "_id": tmp["_id"],
  145. // },
  146. // map[string]interface{}{
  147. // "$set": map[string]interface{}{
  148. // "extract_state": 4,
  149. // },
  150. // },
  151. // })
  152. //批量更新
  153. if len(arrMgo) >= savesizei-1 {
  154. mgo.UpdateBulkAll(db, c, arrMgo...)
  155. arrMgo = [][]map[string]interface{}{}
  156. }
  157. //生索引
  158. if len(arrEs) >= savesizei-1 {
  159. tmps := arrEs
  160. elastic.BulkSave(index, itype, &tmps, true)
  161. arrEs = []map[string]interface{}{}
  162. }
  163. SaveUpdageLock.Unlock()
  164. //计数
  165. if n%savesizei == 0 {
  166. log.Println("当前:", n)
  167. }
  168. tmp = make(map[string]interface{})
  169. }
  170. SaveUpdageLock.Lock()
  171. if len(arrMgo) > 0 {
  172. mgo.UpdateBulkAll(db, c, arrMgo...)
  173. }
  174. if len(arrEs) > 0 {
  175. tmps := arrEs
  176. elastic.BulkSave(index, itype, &tmps, true)
  177. }
  178. SaveUpdageLock.Unlock()
  179. log.Println("create filetext index...over", n)
  180. }
  181. //定时任务site:中国招标投标公共服务平台
  182. /*
  183. 注意:
  184. 1、调用此任务时config.json中indexfields配置不要有purchasing、purchasinglist、filetext
  185. */
  186. func site_attach_text(q map[string]interface{}) {
  187. defer util.Catch()
  188. //锁
  189. SaveUpdageLock := sync.Mutex{}
  190. //连接参数
  191. c, _ := bidding["collect"].(string) //bidding表
  192. db, _ := bidding["db"].(string) //库
  193. index, _ := bidding["index"].(string) //索引别名
  194. itype, _ := bidding["type"].(string)
  195. //
  196. session := mgo.GetMgoConn(86400)
  197. defer mgo.DestoryMongoConn(session)
  198. count, _ := session.DB(db).C(c).Find(&q).Count()
  199. log.Println("site_attach_text: ", db, c, "查询语句:", q, "同步总数:", count, "elastic库:", index)
  200. query := session.DB(db).C(c).Find(q).Select(bson.M{
  201. "projectinfo.attachment": 0,
  202. "contenthtml": 0,
  203. }).Iter()
  204. arrEs := make([]map[string]interface{}, savesizei)
  205. arrMgo := [][]map[string]interface{}{}
  206. var n int
  207. var indexnum int
  208. i := 0
  209. for tmp := make(map[string]interface{}); query.Next(tmp); i = i + 1 {
  210. n++
  211. //计数
  212. if n%savesizei == 0 {
  213. log.Println("当前:", n)
  214. }
  215. site, _ := tmp["site"].(string)
  216. if util.IntAll(tmp["extracttype"]) == -1 || site != "中国招标投标公共服务平台" || tmp["attach_text"] == nil {
  217. tmp = make(map[string]interface{})
  218. continue
  219. }
  220. newTmp := map[string]interface{}{} //最终生索引的数据
  221. saveArr := []map[string]interface{}{}
  222. filetext := getFileText(tmp) //oss拼装filetext
  223. if len(filetext) > 10 {
  224. tmp["detail"] = filetext //filetext替换detail
  225. saveArr = append(saveArr, map[string]interface{}{"_id": tmp["_id"]})
  226. saveArr = append(saveArr, map[string]interface{}{
  227. "$set": map[string]interface{}{
  228. "filedetail": 1,
  229. "detail": filetext,
  230. },
  231. })
  232. newTmp["filetext"] = filetext //
  233. } else {
  234. //log.Println("filetext is null string:", tmp["_id"])
  235. tmp = make(map[string]interface{})
  236. continue
  237. }
  238. indexnum++
  239. //purchasing
  240. if purchasing, ok := tmp["purchasing"].(string); ok {
  241. if len(purchasing) > 0 {
  242. newTmp["purchasing"] = tmp["purchasing"]
  243. }
  244. }
  245. //purchasinglist
  246. if purchasinglist, ok := tmp["purchasinglist"].([]interface{}); ok {
  247. if len(purchasinglist) > 0 {
  248. purchasinglist_new := []map[string]interface{}{}
  249. for _, ls := range purchasinglist {
  250. lsm_new := make(map[string]interface{})
  251. lsm := ls.(map[string]interface{})
  252. for _, pf := range purchasinglistFields {
  253. if lsm[pf] != nil {
  254. lsm_new[pf] = lsm[pf]
  255. }
  256. }
  257. if lsm_new != nil && len(lsm_new) > 0 {
  258. purchasinglist_new = append(purchasinglist_new, lsm_new)
  259. }
  260. }
  261. if len(purchasinglist_new) > 0 {
  262. newTmp["purchasinglist"] = purchasinglist_new
  263. }
  264. }
  265. }
  266. //处理数据
  267. if tmp["supervisorrate"] != nil { //临时处理supervisorrate抽取类型为string不生索引
  268. if _, ok := tmp["supervisorrate"].(string); ok { //supervisorrate数据为string类型
  269. delete(tmp, "supervisorrate")
  270. }
  271. }
  272. //对projectscope字段的索引处理
  273. ps, _ := tmp["projectscope"].(string)
  274. if len(ps) > ESLEN {
  275. tmp["projectscope"] = string(([]rune(ps))[:4000])
  276. }
  277. SaveUpdageLock.Lock()
  278. for _, v := range biddingIndexFields { //索引字段
  279. if tmp[v] != nil {
  280. if "projectinfo" == v {
  281. mp, _ := tmp[v].(map[string]interface{})
  282. if mp != nil {
  283. newmap := map[string]interface{}{}
  284. for _, v1 := range projectinfoFields {
  285. if mp[v1] != nil {
  286. newmap[v1] = mp[v1]
  287. }
  288. }
  289. newTmp[v] = newmap
  290. }
  291. } else {
  292. if v == "detail" {
  293. detail, _ := tmp[v].(string)
  294. newTmp[v] = FilterDetail(detail)
  295. } else {
  296. newTmp[v] = tmp[v]
  297. }
  298. }
  299. }
  300. }
  301. arrEs = append(arrEs, newTmp) //要生索引数据
  302. if len(saveArr) > 0 {
  303. arrMgo = append(arrMgo, saveArr) //要更新数据
  304. }
  305. //批量更新
  306. if len(arrMgo) >= savesizei-1 {
  307. mgo.UpdateBulkAll(db, c, arrMgo...)
  308. arrMgo = [][]map[string]interface{}{}
  309. }
  310. //生索引
  311. if len(arrEs) >= savesizei-1 {
  312. tmps := arrEs
  313. elastic.BulkSave(index, itype, &tmps, true)
  314. arrEs = []map[string]interface{}{}
  315. }
  316. SaveUpdageLock.Unlock()
  317. tmp = make(map[string]interface{})
  318. }
  319. SaveUpdageLock.Lock()
  320. if len(arrMgo) > 0 {
  321. mgo.UpdateBulkAll(db, c, arrMgo...)
  322. }
  323. if len(arrEs) > 0 {
  324. tmps := arrEs
  325. elastic.BulkSave(index, itype, &tmps, true)
  326. }
  327. SaveUpdageLock.Unlock()
  328. log.Println("create filetext index...over", n, indexnum)
  329. }
  330. func getFileText(tmp map[string]interface{}) (filetext string) {
  331. if attchMap, ok := tmp["attach_text"].(map[string]interface{}); attchMap != nil && ok {
  332. for _, tmpData1 := range attchMap {
  333. if tmpData2, ok := tmpData1.(map[string]interface{}); tmpData2 != nil && ok {
  334. for _, result := range tmpData2 {
  335. if resultMap, ok := result.(map[string]interface{}); resultMap != nil && ok {
  336. if attach_url := util.ObjToString(resultMap["attach_url"]); attach_url != "" {
  337. bs := u.OssGetObject(attach_url) //oss读数据
  338. if utf8.RuneCountInString(filetext+bs) < fileLength {
  339. filetext += bs + "\n"
  340. } else {
  341. break
  342. }
  343. }
  344. }
  345. }
  346. }
  347. }
  348. }
  349. return
  350. }