util.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. package main
  2. import (
  3. elastic "app.yhyue.com/moapp/jybase/es"
  4. "fmt"
  5. "go.mongodb.org/mongo-driver/bson/primitive"
  6. u "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  7. "jygit.jydev.jianyu360.cn/data_processing/common_utils/redis"
  8. "log"
  9. "reflect"
  10. "sort"
  11. "strconv"
  12. "strings"
  13. "time"
  14. )
  15. func InitEs() {
  16. Es = elastic.NewEs("v7", conf.Config.Es.Addr, conf.Config.Es.Size, conf.Config.Es.User, conf.Config.Es.Password)
  17. }
  18. // @Description subscopeclass、topscopeclass、package
  19. // 20230523 多包处理 subpackage = 1
  20. // @Author J 2022/6/7 5:54 PM
  21. func fieldFun(data map[string]interface{}) {
  22. // 附件重采,数据同步时不更新判重标识(正常抽取判重)
  23. if u.IntAll(data["repeat"]) == 1 {
  24. data["extracttype"] = -1
  25. //update["repeat_id"] = compare["repeat_id"]
  26. } else {
  27. data["extracttype"] = 1
  28. }
  29. //未抽取、判重处理
  30. if u.IntAll(data["dataging"]) == 1 { //修改未抽取的bidding数据的dataging
  31. data["dataging"] = 0 //
  32. }
  33. if subscopeclass, ok := data["subscopeclass"].(primitive.A); ok && len(subscopeclass) > 0 { //subscopeclass
  34. m1 := map[string]bool{}
  35. var newclass []string
  36. for _, sc := range subscopeclass {
  37. sclass, _ := sc.(string)
  38. if !m1[sclass] {
  39. m1[sclass] = true
  40. newclass = append(newclass, sclass)
  41. }
  42. }
  43. //data["subscopeclass"] = newclass
  44. data["s_subscopeclass"] = strings.Join(newclass, ",")
  45. }
  46. if topscopeclass, ok := data["topscopeclass"].(primitive.A); ok && len(topscopeclass) > 0 { //topscopeclass
  47. m2 := map[string]bool{}
  48. var newclass []string
  49. for _, tc := range topscopeclass {
  50. tclass, _ := tc.(string)
  51. tclass = regLetter.ReplaceAllString(tclass, "") // 去除字母
  52. if !m2[tclass] {
  53. m2[tclass] = true
  54. newclass = append(newclass, tclass)
  55. }
  56. }
  57. //data["topscopeclass"] = topscopeclass
  58. data["s_topscopeclass"] = strings.Join(newclass, ",")
  59. }
  60. //package
  61. if package1 := data["package"]; package1 != nil {
  62. packageM, _ := package1.(map[string]interface{})
  63. //data["package"] = packageM
  64. for _, p := range packageM {
  65. pm, _ := p.(map[string]interface{})
  66. if u.ObjToString(pm["winner"]) != "" || u.Float64All(pm["budget"]) > 0 || u.Float64All(pm["bidamount"]) > 0 {
  67. data["multipackage"] = 1
  68. break
  69. }
  70. }
  71. } else {
  72. data["multipackage"] = 0
  73. }
  74. // subpackage
  75. if data["package"] != nil && data["s_winner"] != nil && data["bidamount"] != nil {
  76. if pg, ok := data["package"].(map[string]interface{}); ok && len(pg) > 1 {
  77. var bmt []float64
  78. var swn []string
  79. for _, p := range pg {
  80. p1 := p.(map[string]interface{})
  81. if p1["bidamount"] != nil {
  82. bmt = append(bmt, u.Float64All(p1["bidamount"]))
  83. }
  84. if w := u.ObjToString(p1["winner"]); w != "" {
  85. swn = append(swn, w)
  86. }
  87. }
  88. if len(bmt) > 1 && len(swn) > 1 {
  89. sn := strings.Split(u.ObjToString(data["s_winner"]), ",")
  90. sort.Strings(sn)
  91. sort.Strings(swn)
  92. swn1 := u.ObjArrToStringArr(Duplicate(swn)) // 去重
  93. if strings.Join(swn1, ",") == strings.Join(sn, ",") {
  94. bidamount := 0.0
  95. for _, f := range bmt {
  96. bidamount += f
  97. }
  98. if bidamount == u.Float64All(data["bidamount"]) {
  99. data["subpackage"] = 1
  100. }
  101. }
  102. }
  103. }
  104. }
  105. }
  106. // Duplicate
  107. // @Description 去重
  108. // @Author J 2023/5/24 09:53
  109. func Duplicate(a interface{}) (ret []interface{}) {
  110. va := reflect.ValueOf(a)
  111. for i := 0; i < va.Len(); i++ {
  112. if i > 0 && reflect.DeepEqual(va.Index(i-1).Interface(), va.Index(i).Interface()) {
  113. continue
  114. }
  115. ret = append(ret, va.Index(i).Interface())
  116. }
  117. return ret
  118. }
  119. // @Description entidlist
  120. // @Author J 2022/6/7 2:36 PM
  121. func companyFun(s_winner string) (cid []string) {
  122. sWinnerarr := strings.Split(s_winner, ",")
  123. for _, w := range sWinnerarr {
  124. if w != "" {
  125. id := redis.GetStr("qyxy_id", w)
  126. if id == "" {
  127. ents, _ := MgoQ.Find(conf.Config.MongodbQ.Coll, map[string]interface{}{"company_name": w}, map[string]interface{}{"updatetime": -1}, map[string]interface{}{"company_name": 1}, false, -1, -1)
  128. if len(*ents) > 0 {
  129. id = u.ObjToString((*ents)[0]["_id"])
  130. redis.PutCKV("qyxy_id", w, id)
  131. } else {
  132. ent, _ := MgoP.FindOne(conf.Config.MongodbP.Coll, map[string]interface{}{"history_name": w})
  133. if len(*ent) > 0 {
  134. id = u.ObjToString((*ent)["company_id"])
  135. redis.PutCKV("qyxy_id", w, id)
  136. }
  137. }
  138. }
  139. if id == "" {
  140. id = "-"
  141. }
  142. cid = append(cid, id)
  143. }
  144. }
  145. return cid
  146. }
  147. // @Description update 修改bidding表,extractM修改抽取表
  148. // @Author J 2022/6/10 10:29 AM
  149. func typeFunc(data map[string]interface{}) {
  150. if jyData, ok := data["jyfb_data"].(map[string]interface{}); ok {
  151. if t := u.ObjToString(jyData["type"]); t != "" {
  152. switch t {
  153. //case "采购信息":
  154. case "招标公告":
  155. if u.ObjToString(data["toptype"]) != "招标" {
  156. data["toptype"] = "招标"
  157. delete(data, "subtype")
  158. }
  159. case "采购意向":
  160. if u.ObjToString(data["toptype"]) != "采购意向" {
  161. data["toptype"] = "采购意向"
  162. data["subtype"] = "采购意向"
  163. }
  164. case "招标预告":
  165. if u.ObjToString(data["toptype"]) != "预告" {
  166. data["toptype"] = "预告"
  167. delete(data, "subtype")
  168. }
  169. case "招标结果":
  170. if u.ObjToString(data["toptype"]) != "结果" {
  171. data["toptype"] = "结果"
  172. delete(data, "subtype")
  173. }
  174. }
  175. }
  176. }
  177. }
  178. // @Description 附件有效字段(isValidFile)
  179. // @Author J 2022/7/8 14:41
  180. func validFile(tmp map[string]interface{}) int {
  181. isContinue := false
  182. if pinfo, o := tmp["projectinfo"].(map[string]interface{}); o {
  183. if atts, o1 := pinfo["attachments"].(map[string]interface{}); o1 {
  184. for _, att := range atts {
  185. if att == nil {
  186. continue
  187. }
  188. if reflect.TypeOf(att).String() == "string" {
  189. continue
  190. }
  191. att1 := att.(map[string]interface{})
  192. if fid := u.ObjToString(att1["fid"]); fid != "" {
  193. isContinue = true
  194. break
  195. }
  196. }
  197. if isContinue {
  198. if attachTxt, o := tmp["attach_text"].(map[string]interface{}); o {
  199. if len(attachTxt) > 0 {
  200. for _, at := range attachTxt {
  201. if at1, ok := at.(map[string]interface{}); ok && len(at1) > 0 {
  202. for k, _ := range at1 {
  203. if reflect.TypeOf(at1[k]).String() == "string" {
  204. continue
  205. }
  206. at2 := at1[k].(map[string]interface{})
  207. s := strings.ToLower(u.ObjToString(at2["file_name"]))
  208. //和王江含确认,此处判断老版本存在问题,正确为排除jpg、jpeg、png、pdf类型(2024-03-12)
  209. //if !strings.Contains(s, "jpg") || !strings.Contains(s, "jpeg") != strings.Contains(s, "png") || strings.Contains(s, "pdf") {
  210. if !filterFileType.MatchString(s) {
  211. if strings.Contains(s, "swf") || strings.Contains(s, "html") {
  212. return -1
  213. } else if AnalysisFile(OssGetObject(u.ObjToString(at2["attach_url"]))) {
  214. return 1
  215. }
  216. }
  217. }
  218. break
  219. } else {
  220. break
  221. }
  222. }
  223. }
  224. }
  225. flag := false
  226. for _, att := range atts {
  227. if att == nil {
  228. continue
  229. }
  230. if reflect.TypeOf(att).String() == "string" {
  231. continue
  232. }
  233. att1 := att.(map[string]interface{})
  234. if fid := u.ObjToString(att1["fid"]); fid != "" {
  235. ftype := strings.ToLower(u.ObjToString(tmp["ftype"]))
  236. if ftype != "swf" && ftype != "html" && OssObjExists("jy-datafile", fid) {
  237. return 1
  238. } else {
  239. flag = true
  240. }
  241. }
  242. }
  243. if flag {
  244. return -1
  245. }
  246. }
  247. }
  248. }
  249. return 0
  250. }
  251. var DateTimeSelect = []string{"bidopentime", "bidendtime", "signaturedate", "comeintime"}
  252. // @Description 发布时间处理
  253. // @Author J 2023/5/23 14:32
  254. func methodPb(tmp map[string]interface{}) {
  255. if tmp["ext_publishtime"] != nil {
  256. if newPb := u.Int64All(tmp["ext_publishtime"]); newPb < time.Now().Unix() && newPb > 1420041600 {
  257. tmp["publishtime"] = newPb
  258. return
  259. }
  260. }
  261. for _, d := range DateTimeSelect {
  262. if tmp[d] != nil && u.Int64All(tmp[d]) < time.Now().Unix() {
  263. tmp["publishtime"] = u.Int64All(tmp[d])
  264. return
  265. }
  266. }
  267. return
  268. }
  269. // @Description 获取情报标签
  270. // @Author 徐志恒 2024/2/21 09:53
  271. func getTagSet(data map[string]interface{}) map[string]map[string]interface{} {
  272. tagSet := map[string]map[string]interface{}{}
  273. wuye := map[string]interface{}{}
  274. buyer := u.ObjToString(data["buyer"])
  275. publishtime := u.Int64All(data["publishtime"])
  276. bidamount := u.Float64All(data["bidamount"])
  277. wuye["isfirsthand"] = 62
  278. if buyer != "" {
  279. sql := `{
  280. "query": {
  281. "bool": {
  282. "must": [
  283. {
  284. "term": {
  285. "buyer": "` + buyer + `"
  286. }
  287. },
  288. {
  289. "term": {
  290. "tag_topinformation": "情报_物业"
  291. }
  292. },
  293. {
  294. "term": {
  295. "subtype": "合同"
  296. }
  297. },
  298. {
  299. "range": {
  300. "publishtime": {
  301. "lte": ` + fmt.Sprint(publishtime) + `
  302. }
  303. }
  304. }
  305. ]
  306. }
  307. },
  308. "sort": {
  309. "publishtime": "asc"
  310. },
  311. "_source": [
  312. "s_winner"
  313. ],
  314. "size": 10000
  315. }`
  316. data := Es.Get("bidding", "bidding", sql)
  317. if data != nil && len(*data) > 0 {
  318. count := 0
  319. first := u.ObjToString((*data)[0]["s_winner"])
  320. for k, v := range *data {
  321. winner := u.ObjToString(v["s_winner"])
  322. if k > 0 && first != winner {
  323. first = winner
  324. count++
  325. }
  326. }
  327. changehand := fmt.Sprintf("%.2f", float64(count)/float64(len(*data)))
  328. changehands, _ := strconv.ParseFloat(changehand, 64)
  329. wuye["changehand"] = changehands
  330. log.Println("changehands", count, len(*data), changehands)
  331. if changehands > 0.3 {
  332. wuye["changehandindex"] = 61
  333. }
  334. if len(*data) > 1 {
  335. wuye["isfirsthand"] = 0
  336. }
  337. }
  338. }
  339. if data["projectinfo"] != nil {
  340. projectInfo := u.ObjToMap(data["projectinfo"])
  341. if projectInfo != nil && len(*projectInfo) > 0 {
  342. if (*projectInfo)["attachments"] != nil {
  343. wuye["isfile"] = 63
  344. }
  345. }
  346. }
  347. wuye["period"] = getperiod(data)
  348. wuye["scale"] = getBidamountRange(bidamount)
  349. if data["property_form"] != nil {
  350. property_form := u.ObjArrToStringArr(data["property_form"].([]interface{}))
  351. wuye["property_form"] = getpropertyform(property_form)
  352. }
  353. tagSet["wuye"] = wuye
  354. return tagSet
  355. }
  356. func getBidamountRange(value float64) int {
  357. switch {
  358. case value < 500000:
  359. return 1
  360. case value >= 500000 && value < 1000000:
  361. return 2
  362. case value >= 1000000 && value < 2000000:
  363. return 3
  364. case value >= 2000000 && value < 5000000:
  365. return 4
  366. default:
  367. return 5
  368. }
  369. }
  370. func getpropertyform(value []string) string {
  371. arr := []string{}
  372. categories := map[string]string{
  373. "住宅": "21",
  374. "政府办公楼": "22",
  375. "学校": "23",
  376. "医院": "24",
  377. "产业园区": "25",
  378. "旅游景区": "26",
  379. "交通运输": "27",
  380. "商务办公楼": "28",
  381. "酒店": "29",
  382. }
  383. for _, v := range value {
  384. if categories[v] != "" {
  385. arr = append(arr, categories[v])
  386. }
  387. }
  388. return strings.Join(arr, ",")
  389. }
  390. func getperiod(data map[string]interface{}) int {
  391. res := 16
  392. signaturedate := u.Int64All(data["signaturedate"]) //合同签订日期
  393. expiredate := u.Int64All(data["expiredate"]) //合同截止日期
  394. // contractperiod := util.ObjToString(data["contractperiod"]) //合同期限
  395. project_duration := u.IntAll(data["project_duration"]) //工期时长
  396. project_timeunit := u.ObjToString(data["project_timeunit"]) //工期单位
  397. result := float64(0)
  398. if expiredate > 0 && signaturedate > 0 {
  399. result = calculateYearDifference(signaturedate, expiredate)
  400. } else if project_duration > 0 && project_timeunit != "" {
  401. if strings.Contains(project_timeunit, "年") {
  402. if project_duration == 1 {
  403. res = 12
  404. } else if project_duration == 2 {
  405. res = 13
  406. } else if project_duration == 3 {
  407. res = 14
  408. } else if project_duration == 5 {
  409. res = 15
  410. }
  411. return res
  412. } else if strings.Contains(project_timeunit, "月") {
  413. result = float64(project_duration) / 12
  414. } else if strings.Contains(project_timeunit, "周") {
  415. result = float64(project_duration) * 7 / 365
  416. } else if strings.Contains(project_timeunit, "日") || strings.Contains(project_timeunit, "天") {
  417. result = float64(project_duration) / 365
  418. }
  419. }
  420. if result == 0 {
  421. res = 16
  422. } else if result < 1 {
  423. res = 11
  424. } else if result >= 1 && result < 2 {
  425. res = 12
  426. } else if result >= 2 && result < 3 {
  427. res = 13
  428. } else if result >= 3 && result < 4 {
  429. res = 14
  430. } else if result >= 5 {
  431. res = 15
  432. }
  433. return res
  434. }
  435. func calculateYearDifference(startTime int64, endTime int64) float64 {
  436. start := time.Unix(startTime, 0)
  437. end := time.Unix(endTime, 0)
  438. duration := end.Sub(start)
  439. years := duration.Hours() / 24 / 365
  440. return years
  441. }