extpackage.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. // extpackage
  2. package extract
  3. import (
  4. "jy/clear"
  5. ju "jy/util"
  6. "log"
  7. qu "qfw/util"
  8. "reflect"
  9. "regexp"
  10. "sort"
  11. )
  12. func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask, isSite bool, codeSite string) {
  13. if pkg.ColonKV != nil {
  14. kvparse(pkg.ColonKV, e, sonJobResult, isSite, codeSite)
  15. }
  16. if pkg.TableKV != nil {
  17. kvparse(pkg.TableKV, e, sonJobResult, isSite, codeSite)
  18. }
  19. if pkg.SpaceKV != nil {
  20. kvparse(pkg.SpaceKV, e, sonJobResult, isSite, codeSite)
  21. }
  22. }
  23. func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string) {
  24. if p != nil {
  25. for pk, pv2 := range p.KvTags {
  26. if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
  27. tmp := []*ju.Tag{}
  28. var tmpindex, tmpweight int = -9999, -9999
  29. for ii, vv := range pv2 {
  30. if pk == "中标单位" && regexp.MustCompile("[0-9.元人¥$]").MatchString(vv.Value){
  31. continue
  32. }
  33. if tmpweight < vv.Weight {
  34. tmpindex = ii
  35. tmpweight = vv.Weight
  36. }
  37. }
  38. tmp = append(tmp, pv2[tmpindex])
  39. p.KvTags[pk] = tmp
  40. }
  41. }
  42. for pk, pv := range p.KvTags {
  43. if len(pv) == 0 {
  44. continue
  45. }
  46. tags := ju.GetTags(pk, isSite, codeSite)
  47. if tags.Len() > 0 {
  48. if ((*sonJobResult)["name"] == nil || (*sonJobResult)["name"] == "") && tags[0].Key == "项目名称" {
  49. (*sonJobResult)["name"] = pv[0].Value
  50. }
  51. if qu.Float64All((*sonJobResult)["budget"]) == 0 && tags[0].Key == "预算" {
  52. lock.Lock()
  53. cfn := e.ClearFn["budget"]
  54. lock.Unlock()
  55. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  56. (*sonJobResult)["budget"] = data[0]
  57. continue
  58. }
  59. if qu.Float64All((*sonJobResult)["bidamount"]) == 0 && tags[0].Key == "中标金额" {
  60. lock.Lock()
  61. cfn := e.ClearFn["budget"]
  62. lock.Unlock()
  63. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  64. (*sonJobResult)["bidamount"] = data[0]
  65. continue
  66. }
  67. if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "") && tags[0].Key == "中标单位" {
  68. (*sonJobResult)["winner"] = pv[0].Value
  69. }
  70. }
  71. if (*sonJobResult)["name"] == nil && pk == "名称" {
  72. (*sonJobResult)["name"] = pv[0].Value
  73. }
  74. }
  75. }
  76. }
  77. //处理分包信息
  78. func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
  79. qu.Try(func() {
  80. if len(j.BlockPackage) > 0 {
  81. for _, ev := range e.PkgRuleCores {
  82. for _, eve := range ev.RuleCores {
  83. if !eve.IsLua {
  84. ExtRuleCoreByPkgReg(j, eve, e) // 分包正则抽取 预算 中标单位 中标价 成交状态
  85. }
  86. }
  87. for _, evb := range ev.RuleBacks {
  88. if !evb.IsLua {
  89. ExtRegBackPkg(j, evb) // 分包正则清理 中标单位 成交状态 内容 名称
  90. }
  91. }
  92. }
  93. tmpkeys := []string{}
  94. for k, _ := range j.BlockPackage {
  95. if k == "" {
  96. continue
  97. }
  98. tmpkeys = append(tmpkeys, k)
  99. }
  100. sort.Strings(tmpkeys)
  101. packageResult := map[string]map[string]interface{}{}
  102. //packagenum := len(j.BlockPackage)
  103. for _, pkName := range tmpkeys {
  104. pkg, ok := j.BlockPackage[pkName]
  105. if !ok {
  106. continue
  107. }
  108. //是否清理标记
  109. clearmap := map[string]bool{}
  110. sonJobResult := map[string]interface{}{}
  111. if pkg != nil {
  112. sonJobResult["origin"] = pkg.Origin
  113. sonJobResult["text"] = pkg.Text
  114. sonJobResult["budget"] = pkg.Budget
  115. sonJobResult["bidamount"] = pkg.Bidamount
  116. if pkg.Winner == "" && len(j.Winnerorder) > 0 {
  117. if sonJobResult["winnerorder"] == nil {
  118. sonJobResult["winnerorder"] = j.Winnerorder
  119. if sonJobResult["bidamount"].(float64) <= 0 {
  120. sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
  121. }
  122. if sonJobResult["winner"] == "" {
  123. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  124. }
  125. }
  126. } else {
  127. if len(j.Winnerorder) > 0 {
  128. sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
  129. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  130. }
  131. sonJobResult["winnerorder"] = pkg.WinnerOrder
  132. }
  133. pkvdata(pkg, &sonJobResult, e, isSite, codeSite)
  134. sonJobResult["type"] = pkg.Type
  135. if len(tmpkeys) == 1 {
  136. if qu.Float64All(sonJobResult["budget"]) == 0 {
  137. for _, bv := range j.Block {
  138. kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite)
  139. kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite)
  140. kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite)
  141. }
  142. }
  143. }
  144. if sonJobResult["name"] == nil {
  145. sonJobResult["name"] = j.Title
  146. }
  147. }
  148. //分包暂不参与选举
  149. /*
  150. for k, tags := range e.Tag {
  151. L:
  152. for _, tag := range tags {
  153. if pkg.TableKV != nil {
  154. for key, val := range pkg.TableKV.Kv {
  155. if tag.Key == key {
  156. clearmap[k] = false
  157. var tmpval interface{}
  158. if len(e.ClearFn[k]) > 0 {
  159. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  160. tmpval = data[0]
  161. } else {
  162. tmpval = val
  163. }
  164. sonJobResult[k] = tmpval
  165. if packagenum == 1 {
  166. field := &ju.ExtField{
  167. Field: k,
  168. Code: "package",
  169. RuleText: "package",
  170. Type: "table",
  171. MatchType: "tag_string",
  172. ExtFrom: "package",
  173. Value: tmpval,
  174. Score: 0,
  175. }
  176. j.Result[k] = append(j.Result[k], field)
  177. }
  178. break L
  179. }
  180. }
  181. }
  182. if pkg.ColonKV != nil {
  183. for key, val := range pkg.ColonKV.Kv {
  184. if tag.Key == key {
  185. clearmap[k] = true
  186. var tmpval interface{}
  187. if len(e.ClearFn[k]) > 0 {
  188. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  189. tmpval = data[0]
  190. } else {
  191. tmpval = val
  192. }
  193. sonJobResult[k] = tmpval
  194. if packagenum == 1 {
  195. field := &ju.ExtField{
  196. Field: k,
  197. Code: "package",
  198. RuleText: "package",
  199. Type: "colon",
  200. MatchType: "tag_string",
  201. ExtFrom: "package",
  202. Value: tmpval,
  203. Score: 0,
  204. }
  205. j.Result[k] = append(j.Result[k], field)
  206. }
  207. break L
  208. }
  209. }
  210. }
  211. if pkg.SpaceKV != nil {
  212. for key, val := range pkg.SpaceKV.Kv {
  213. if tag.Key == key {
  214. clearmap[k] = true
  215. var tmpval interface{}
  216. if len(e.ClearFn[k]) > 0 {
  217. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  218. tmpval = data[0]
  219. } else {
  220. tmpval = val
  221. }
  222. sonJobResult[k] = tmpval
  223. if packagenum == 1 {
  224. field := &ju.ExtField{
  225. Field: k,
  226. Code: "package",
  227. RuleText: "package",
  228. Type: "space",
  229. MatchType: "tag_string",
  230. ExtFrom: "package",
  231. Value: tmpval,
  232. Score: 0,
  233. }
  234. j.Result[k] = append(j.Result[k], field)
  235. }
  236. break L
  237. }
  238. }
  239. }
  240. }
  241. }
  242. */
  243. //如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值
  244. if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 {
  245. firstWinnerOrder := pkg.WinnerOrder[0]
  246. if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  247. sonJobResult["winner"] = firstWinnerOrder["entname"]
  248. }
  249. if qu.Float64All(sonJobResult["bidamount"]) == 0 || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  250. sonJobResult["bidamount"] = firstWinnerOrder["price"]
  251. }
  252. }
  253. //log.Println(pkName, sonJobResult)
  254. sonJobResult["clear"] = clearmap
  255. packageResult[pkName] = sonJobResult
  256. }
  257. if len(packageResult) > 0 {
  258. j.PackageInfo = packageResult
  259. }
  260. }
  261. //extRegBackPack(j, e)
  262. }, func(err interface{}) {
  263. log.Println("PackageDetail err", err)
  264. })
  265. }
  266. //清理分包信息
  267. func extRegBackPack(j *ju.Job, e *ExtractTask) {
  268. defer qu.Catch()
  269. //正则清理
  270. if j.CategorySecond == "" {
  271. for _, rc1 := range e.RuleCores[j.Category] {
  272. for _, rc := range rc1 {
  273. for pk, pack := range j.PackageInfo {
  274. clear, _ := pack["clear"].(map[string]interface{})
  275. for k, val := range pack {
  276. if b, ok := clear[k].(bool); ok && b {
  277. if rc.Field == k {
  278. text := qu.ObjToString(val)
  279. for _, in := range rc.RuleBacks {
  280. if text != "" && !in.IsLua {
  281. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  282. }
  283. }
  284. pack[k] = text
  285. }
  286. }
  287. }
  288. j.PackageInfo[pk] = pack
  289. }
  290. }
  291. }
  292. } else {
  293. for _, rc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
  294. for _, rc := range rc1 {
  295. for pk, pack := range j.PackageInfo {
  296. clear, _ := pack["clear"].(map[string]interface{})
  297. for k, val := range pack {
  298. if b, ok := clear[k].(bool); ok && b {
  299. if rc.Field == k {
  300. text := qu.ObjToString(val)
  301. for _, in := range rc.RuleBacks {
  302. if text != "" && !in.IsLua {
  303. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  304. }
  305. }
  306. pack[k] = text
  307. }
  308. }
  309. }
  310. j.PackageInfo[pk] = pack
  311. }
  312. }
  313. }
  314. }
  315. //通用正则清理
  316. for _, in := range e.RuleBacks {
  317. for _, pack := range j.PackageInfo {
  318. for k, val := range pack {
  319. if in.Field == k {
  320. text := qu.ObjToString(val)
  321. if text != "" && !in.IsLua {
  322. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  323. }
  324. pack[k] = text
  325. }
  326. }
  327. }
  328. }
  329. //函数清理
  330. for _, pack := range j.PackageInfo {
  331. for key, val := range pack {
  332. if reflect.TypeOf(val) != nil && (reflect.TypeOf(val).String() == "float64" || reflect.TypeOf(val).String() == "int64") {
  333. continue
  334. } else {
  335. lock.Lock()
  336. cfn := e.ClearFn[key]
  337. lock.Unlock()
  338. data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
  339. pack[key] = data[0]
  340. }
  341. }
  342. }
  343. //特殊属性的分包清理
  344. for _, rc := range e.PkgRuleCores {
  345. for pk, pack := range j.PackageInfo {
  346. for k, val := range pack {
  347. if rc.Field == k {
  348. text := qu.ObjToString(val)
  349. for _, in := range rc.RuleBacks {
  350. if text != "" {
  351. if !in.IsLua { //正则
  352. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  353. } else { //lua
  354. result := GetResultMapForLua(j)
  355. lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
  356. if j != nil {
  357. lua.Block = j.Block
  358. }
  359. extinfo := lua.RunScript("back")
  360. if extinfo["value"] != nil {
  361. text = qu.ObjToString(extinfo["value"])
  362. }
  363. }
  364. }
  365. }
  366. pack[k] = text
  367. }
  368. }
  369. j.PackageInfo[pk] = pack
  370. }
  371. }
  372. }