extension.go 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. package extract
  2. import (
  3. "data_ai/clean"
  4. "data_ai/prompt"
  5. "data_ai/ul"
  6. log "github.com/donnie4w/go-logger/logger"
  7. qu "jygit.jydev.jianyu360.cn/data_processing/common_utils"
  8. "strings"
  9. "unicode/utf8"
  10. )
  11. // 确认抽取范围
  12. func ConfrimExtractInfo(q map[string]interface{}) map[string]interface{} {
  13. dict := map[string]interface{}{}
  14. sess := ul.SourceMgo.GetMgoConn()
  15. defer ul.SourceMgo.DestoryMongoConn(sess)
  16. total := 0
  17. it := sess.DB(ul.SourceMgo.DbName).C(ul.Ext_Name).Find(&q).Select(map[string]interface{}{"_id": 1, "ai_zhipu": 1}).Iter()
  18. for tmp := make(map[string]interface{}); it.Next(&tmp); total++ {
  19. if total%1000 == 0 {
  20. log.Debug("cur index ", total)
  21. }
  22. if tmp["ai_zhipu"] == nil { //已经识别的数据-不再识别
  23. tmpid := ul.BsonTOStringId(tmp["_id"])
  24. dict[tmpid] = tmpid
  25. }
  26. tmp = make(map[string]interface{})
  27. }
  28. return dict
  29. }
  30. // 获取附件名字信息
  31. func getpnsinfo(tmp map[string]interface{}) []string {
  32. arr := []string{}
  33. if projectinfo := qu.ObjToMap(tmp["projectinfo"]); projectinfo != nil {
  34. if attachments := qu.ObjToMap((*projectinfo)["attachments"]); attachments != nil {
  35. for _, v := range *attachments {
  36. if info := qu.ObjToMap(v); info != nil {
  37. if filename := qu.ObjToString((*info)["filename"]); filename != "" {
  38. arr = append(arr, filename)
  39. }
  40. }
  41. }
  42. }
  43. }
  44. return arr
  45. }
  46. // 获取正文数据
  47. func getDetailText(v map[string]interface{}, tmpid string) string {
  48. detail := qu.ObjToString(v["detail"])
  49. if ul.IsTool {
  50. detail = qu.ObjToString(v["details"])
  51. filetext := qu.ObjToString(v["filetext"])
  52. if utf8.RuneCountInString(detail) < 100 && filetext != "" {
  53. detail = filetext
  54. }
  55. } else {
  56. //if bs := ul.OssGetObject(tmpid); bs != "" {
  57. // detail = bs
  58. //}
  59. }
  60. return detail
  61. }
  62. // 获取标的物-过滤产权-拟建
  63. func getPurList(v map[string]interface{}, detail string, f_info map[string]interface{}) []map[string]interface{} {
  64. if qu.ObjToString(v["toptype"]) == "拟建" || qu.ObjToString(v["toptype"]) == "产权" {
  65. return []map[string]interface{}{}
  66. }
  67. p_data := map[string]interface{}{}
  68. p_data["detail"] = qu.ObjToString(v["title"]) + "\n" + detail
  69. p_data["site"] = v["site"]
  70. p_data["attach_text"] = v["attach_text"]
  71. p_data["toptype"] = v["toptype"]
  72. if f_info["s_toptype"] != nil {
  73. p_data["toptype"] = f_info["s_toptype"]
  74. }
  75. if p_info := ul.PostPurchasingList(p_data); len(p_info) > 0 {
  76. if qu.IntAll(p_info["status"]) == 200 {
  77. p_list := ul.IsMarkInterfaceMap(p_info["purchasinglist"])
  78. return p_list
  79. }
  80. }
  81. return []map[string]interface{}{}
  82. }
  83. /*
  84. ****************************************
  85. ****************************************
  86. ****************************************
  87. */
  88. // 过滤信息规则···
  89. func NotInProgressInfo(title string, detail string, v map[string]interface{}) bool {
  90. dl := utf8.RuneCountInString(detail) //文本长度
  91. if strings.Contains(title, "开标记录") || v["jyfb_data"] != nil {
  92. return true
  93. }
  94. if dl <= 20 || (dl <= 50 && ul.SpecialTextReg.MatchString(detail)) {
  95. return true
  96. }
  97. return false
  98. }
  99. // 二次校验采购单位
  100. func CheckOutBuyerInfo(f_data map[string]interface{}) {
  101. if s_buyer := qu.ObjToString(f_data["s_buyer"]); s_buyer != "" {
  102. if zp_buyer := prompt.AcquireBuyerInfo(s_buyer); zp_buyer["实体单位"] != nil {
  103. if ns_buyer := clean.CleanBuyer(qu.ObjToString(zp_buyer["实体单位"])); ns_buyer != "" {
  104. f_data["s_buyer"] = ns_buyer
  105. }
  106. }
  107. }
  108. }
  109. // 合并字段
  110. func MergeInfo(infos []map[string]interface{}) map[string]interface{} {
  111. info := map[string]interface{}{}
  112. for _, v := range infos {
  113. for k1, v1 := range v {
  114. info[k1] = v1
  115. }
  116. }
  117. return info
  118. }
  119. // 强制逻辑判断数据
  120. func ForcedLogicDecideInfo(f_data map[string]interface{}) {
  121. //原则大模型
  122. //多单位不能一致
  123. s_buyer := qu.ObjToString(f_data["s_buyer"])
  124. s_winner := qu.ObjToString(f_data["s_winner"])
  125. if s_buyer == s_winner && s_buyer != "" {
  126. /*
  127. 1、若单位名称-不含公司保留采购单位
  128. 2、若单位名称-含公司保留中标单位
  129. */
  130. if strings.Contains(s_buyer, "公司") {
  131. f_data["s_buyer"] = ""
  132. } else {
  133. f_data["s_winner"] = ""
  134. }
  135. }
  136. //代理机构
  137. if s_agency := qu.ObjToString(f_data["s_agency"]); s_agency != "" {
  138. if s_agency == s_buyer || s_agency == s_winner {
  139. f_data["s_agency"] = ""
  140. }
  141. }
  142. }