extpackage.go 14 KB


  1. // extpackage
  2. package extract
  3. import (
  4. "jy/clear"
  5. ju "jy/util"
  6. "log"
  7. qu "qfw/util"
  8. "reflect"
  9. "regexp"
  10. "sort"
  11. )
  12. func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask, isSite bool, codeSite string) {
  13. if pkg.ColonKV != nil {
  14. kvparse(pkg.ColonKV, e, sonJobResult, isSite, codeSite)
  15. }
  16. if pkg.TableKV != nil {
  17. kvparse(pkg.TableKV, e, sonJobResult, isSite, codeSite)
  18. }
  19. if pkg.SpaceKV != nil {
  20. kvparse(pkg.SpaceKV, e, sonJobResult, isSite, codeSite)
  21. }
  22. }
  23. func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string) {
  24. if p != nil {
  25. for pk, pv2 := range p.KvTags {
  26. if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
  27. tmp := []*ju.Tag{}
  28. var tmpindex, tmpweight int = -9999, -9999
  29. for ii, vv := range pv2 {
  30. if pk == "中标单位" && regexp.MustCompile("[0-9.元人¥$]").MatchString(vv.Value) {
  31. continue
  32. }
  33. if tmpweight < vv.Weight {
  34. tmpindex = ii
  35. tmpweight = vv.Weight
  36. }
  37. }
  38. if tmpindex == -9999 {
  39. continue
  40. }
  41. tmp = append(tmp, pv2[tmpindex])
  42. p.KvTags[pk] = tmp
  43. }
  44. }
  45. for pk, pv := range p.KvTags {
  46. if len(pv) == 0 {
  47. continue
  48. }
  49. tags := ju.GetTags(pk, isSite, codeSite)
  50. if tags.Len() > 0 {
  51. if ((*sonJobResult)["name"] == nil || (*sonJobResult)["name"] == "") && tags[0].Key == "项目名称" {
  52. (*sonJobResult)["name"] = pv[0].Value
  53. }
  54. if qu.Float64All((*sonJobResult)["budget"]) == 0 && tags[0].Key == "预算" {
  55. lock.Lock()
  56. cfn := e.ClearFn["budget"]
  57. lock.Unlock()
  58. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  59. if data[0] == 0 {
  60. if istrue, ok := data[len(data)-1].(bool); istrue && ok {
  61. (*sonJobResult)["budget"] = data[0]
  62. }
  63. }
  64. continue
  65. }
  66. if qu.Float64All((*sonJobResult)["bidamount"]) == 0 && tags[0].Key == "中标金额" {
  67. lock.Lock()
  68. cfn := e.ClearFn["bidamount"]
  69. lock.Unlock()
  70. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  71. if istrue, ok := data[len(data)-1].(bool); istrue && ok {
  72. (*sonJobResult)["bidamount"] = data[0]
  73. }
  74. continue
  75. }
  76. if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "") && tags[0].Key == "中标单位" {
  77. if winnerorderNotReg.MatchString(pv[0].Value) {
  78. continue
  79. }
  80. (*sonJobResult)["winner"] = pv[0].Value
  81. }
  82. }
  83. if (*sonJobResult)["name"] == nil && pk == "名称" {
  84. (*sonJobResult)["name"] = pv[0].Value
  85. }
  86. }
  87. }
  88. }
  89. var winnerorderNotReg = regexp.MustCompile(`(附件|否决原因|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\d[\s]{0,10}(\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})`)
  90. //处理分包信息
  91. func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
  92. qu.Try(func() {
  93. if len(j.BlockPackage) > 0 {
  94. for _, ev := range e.PkgRuleCores {
  95. for _, eve := range ev.RuleCores {
  96. if !eve.IsLua {
  97. ExtRuleCoreByPkgReg(j, eve, e) // 分包正则抽取 预算 中标单位 中标价 成交状态
  98. }
  99. }
  100. for _, evb := range ev.RuleBacks {
  101. if !evb.IsLua {
  102. ExtRegBackPkg(j, evb) // 分包正则清理 中标单位 成交状态 内容 名称
  103. }
  104. }
  105. }
  106. ordertmp := map[int][]map[string]interface{}{}
  107. ordertmpint := []int{}//中标排序人一共几组
  108. //中标候选人
  109. for _, v := range j.Winnerorder {
  110. if vtype, ok := v["type"].(int); ok {
  111. if ordertmp[vtype] == nil {
  112. ordertmpint = append(ordertmpint, vtype)
  113. tmp := make([]map[string]interface{}, 0)
  114. tmp = append(tmp, v)
  115. ordertmp[vtype] = tmp
  116. } else {
  117. ordertmp[vtype] = append(ordertmp[vtype], v)
  118. }
  119. }
  120. }
  121. tmpkeys := []string{}
  122. for k, _ := range j.BlockPackage {
  123. if k == "" {
  124. continue
  125. }
  126. tmpkeys = append(tmpkeys, k)
  127. }
  128. sort.Strings(tmpkeys)
  129. packageResult := map[string]map[string]interface{}{}
  130. //packagenum := len(j.BlockPackage)
  131. tmpindex :=-1
  132. for _, pkName := range tmpkeys {
  133. tmpindex++
  134. pkg, ok := j.BlockPackage[pkName]
  135. if !ok {
  136. continue
  137. }
  138. //是否清理标记
  139. clearmap := map[string]bool{}
  140. sonJobResult := map[string]interface{}{}
  141. if pkg != nil {
  142. sonJobResult["origin"] = pkg.Origin
  143. sonJobResult["text"] = pkg.Text
  144. sonJobResult["name"] = pkg.Name
  145. if pkg.WinnerPerson != "" {
  146. sonJobResult["winnertel"] = pkg.WinnerTel
  147. sonJobResult["winnerperson"] = pkg.WinnerPerson
  148. }
  149. if pkg.IsTrueBudget{
  150. sonJobResult["budget"] = pkg.Budget
  151. }
  152. if pkg.IsTrueBidamount {
  153. sonJobResult["bidamount"] = pkg.Bidamount
  154. }
  155. if pkg.Winner == "" && len(j.Winnerorder) > 0 {
  156. if sonJobResult["winnerorder"] == nil && len(tmpkeys) == len(ordertmpint) {//分包和中标候选人长度一样
  157. //ordertmp[ordertmpint[tmpindex]] 取中标候选人组
  158. sonJobResult["winnerorder"] = ordertmp[ordertmpint[tmpindex]]
  159. if sonJobResult["bidamount"] == nil || sonJobResult["bidamount"].(float64) <= 0 {
  160. if j.Winnerorder[0]["price"] != nil {
  161. moneys := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"],""})
  162. if len(moneys) > 0 &&moneys[len(moneys)-1].(bool){
  163. if vf, ok := moneys[0].(float64); ok {
  164. sonJobResult["bidamount"] = vf
  165. }
  166. }
  167. }
  168. }
  169. if sonJobResult["winner"] == "" {
  170. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  171. }
  172. }
  173. } else {
  174. if len(j.Winnerorder) > 0 {
  175. if j.Winnerorder[0]["price"] != nil {
  176. sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
  177. }
  178. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  179. }
  180. if len(pkg.WinnerOrder) > 0 {
  181. sonJobResult["winnerorder"] = pkg.WinnerOrder
  182. sonJobResult["winner"] = pkg.WinnerOrder[0]["entname"]
  183. }
  184. if sonJobResult["winner"] == nil || sonJobResult["winner"] == "" {
  185. sonJobResult["winner"] = pkg.Winner
  186. }
  187. }
  188. pkvdata(pkg, &sonJobResult, e, isSite, codeSite)
  189. sonJobResult["type"] = pkg.Type
  190. if len(tmpkeys) == 1 {
  191. if qu.Float64All(sonJobResult["budget"]) == 0 && pkg.IsTrueBudget {
  192. for _, bv := range j.Block {
  193. kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite)
  194. kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite)
  195. kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite)
  196. }
  197. }
  198. }
  199. if sonJobResult["name"] == nil || sonJobResult["name"] == "" {
  200. sonJobResult["name"] = j.Title
  201. }
  202. }
  203. //分包暂不参与选举
  204. /*
  205. for k, tags := range e.Tag {
  206. L:
  207. for _, tag := range tags {
  208. if pkg.TableKV != nil {
  209. for key, val := range pkg.TableKV.Kv {
  210. if tag.Key == key {
  211. clearmap[k] = false
  212. var tmpval interface{}
  213. if len(e.ClearFn[k]) > 0 {
  214. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  215. tmpval = data[0]
  216. } else {
  217. tmpval = val
  218. }
  219. sonJobResult[k] = tmpval
  220. if packagenum == 1 {
  221. field := &ju.ExtField{
  222. Field: k,
  223. Code: "package",
  224. RuleText: "package",
  225. Type: "table",
  226. MatchType: "tag_string",
  227. ExtFrom: "package",
  228. Value: tmpval,
  229. Score: 0,
  230. }
  231. j.Result[k] = append(j.Result[k], field)
  232. }
  233. break L
  234. }
  235. }
  236. }
  237. if pkg.ColonKV != nil {
  238. for key, val := range pkg.ColonKV.Kv {
  239. if tag.Key == key {
  240. clearmap[k] = true
  241. var tmpval interface{}
  242. if len(e.ClearFn[k]) > 0 {
  243. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  244. tmpval = data[0]
  245. } else {
  246. tmpval = val
  247. }
  248. sonJobResult[k] = tmpval
  249. if packagenum == 1 {
  250. field := &ju.ExtField{
  251. Field: k,
  252. Code: "package",
  253. RuleText: "package",
  254. Type: "colon",
  255. MatchType: "tag_string",
  256. ExtFrom: "package",
  257. Value: tmpval,
  258. Score: 0,
  259. }
  260. j.Result[k] = append(j.Result[k], field)
  261. }
  262. break L
  263. }
  264. }
  265. }
  266. if pkg.SpaceKV != nil {
  267. for key, val := range pkg.SpaceKV.Kv {
  268. if tag.Key == key {
  269. clearmap[k] = true
  270. var tmpval interface{}
  271. if len(e.ClearFn[k]) > 0 {
  272. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  273. tmpval = data[0]
  274. } else {
  275. tmpval = val
  276. }
  277. sonJobResult[k] = tmpval
  278. if packagenum == 1 {
  279. field := &ju.ExtField{
  280. Field: k,
  281. Code: "package",
  282. RuleText: "package",
  283. Type: "space",
  284. MatchType: "tag_string",
  285. ExtFrom: "package",
  286. Value: tmpval,
  287. Score: 0,
  288. }
  289. j.Result[k] = append(j.Result[k], field)
  290. }
  291. break L
  292. }
  293. }
  294. }
  295. }
  296. }
  297. */
  298. //如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值
  299. if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 {
  300. firstWinnerOrder := pkg.WinnerOrder[0]
  301. if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  302. sonJobResult["winner"] = firstWinnerOrder["entname"]
  303. }
  304. if (qu.Float64All(sonJobResult["bidamount"]) == 0 && pkg.IsTrueBidamount) || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  305. if firstWinnerOrder["price"] != nil {
  306. sonJobResult["bidamount"] = firstWinnerOrder["price"]
  307. }
  308. }
  309. }
  310. //log.Println(pkName, sonJobResult)
  311. sonJobResult["clear"] = clearmap
  312. packageResult[pkName] = sonJobResult
  313. }
  314. if len(packageResult) > 0 {
  315. j.PackageInfo = packageResult
  316. if len(j.Result["winner"]) == 0 {
  317. tmpsss := []*ju.ExtField{}
  318. for k, v := range packageResult {
  319. tmpsss = append(tmpsss, &ju.ExtField{Field: "winner", ExtFrom: "j.PackageInfo." + k + ".winner", Value: v["winner"], Weight: -999})
  320. }
  321. j.Result["winner"] = tmpsss
  322. }
  323. }
  324. }
  325. //extRegBackPack(j, e)
  326. }, func(err interface{}) {
  327. log.Println("PackageDetail err", err)
  328. })
  329. }
  330. //清理分包信息
  331. func extRegBackPack(j *ju.Job, e *ExtractTask) {
  332. defer qu.Catch()
  333. //正则清理
  334. if j.CategorySecond == "" {
  335. for _, rc1 := range e.RuleCores[j.Category] {
  336. for _, rc := range rc1 {
  337. for pk, pack := range j.PackageInfo {
  338. clear, _ := pack["clear"].(map[string]interface{})
  339. for k, val := range pack {
  340. if b, ok := clear[k].(bool); ok && b {
  341. if rc.Field == k {
  342. text := qu.ObjToString(val)
  343. for _, in := range rc.RuleBacks {
  344. if text != "" && !in.IsLua {
  345. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  346. }
  347. }
  348. pack[k] = text
  349. }
  350. }
  351. }
  352. j.PackageInfo[pk] = pack
  353. }
  354. }
  355. }
  356. } else {
  357. for _, rc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
  358. for _, rc := range rc1 {
  359. for pk, pack := range j.PackageInfo {
  360. clear, _ := pack["clear"].(map[string]interface{})
  361. for k, val := range pack {
  362. if b, ok := clear[k].(bool); ok && b {
  363. if rc.Field == k {
  364. text := qu.ObjToString(val)
  365. for _, in := range rc.RuleBacks {
  366. if text != "" && !in.IsLua {
  367. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  368. }
  369. }
  370. pack[k] = text
  371. }
  372. }
  373. }
  374. j.PackageInfo[pk] = pack
  375. }
  376. }
  377. }
  378. }
  379. //通用正则清理
  380. for _, in := range e.RuleBacks {
  381. for _, pack := range j.PackageInfo {
  382. for k, val := range pack {
  383. if in.Field == k {
  384. text := qu.ObjToString(val)
  385. if text != "" && !in.IsLua {
  386. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  387. }
  388. pack[k] = text
  389. }
  390. }
  391. }
  392. }
  393. //函数清理
  394. for _, pack := range j.PackageInfo {
  395. for key, val := range pack {
  396. if reflect.TypeOf(val) != nil && (reflect.TypeOf(val).String() == "float64" || reflect.TypeOf(val).String() == "int64") {
  397. continue
  398. } else {
  399. lock.Lock()
  400. cfn := e.ClearFn[key]
  401. lock.Unlock()
  402. data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
  403. pack[key] = data[0]
  404. }
  405. }
  406. }
  407. //特殊属性的分包清理
  408. for _, rc := range e.PkgRuleCores {
  409. for pk, pack := range j.PackageInfo {
  410. for k, val := range pack {
  411. if rc.Field == k {
  412. text := qu.ObjToString(val)
  413. for _, in := range rc.RuleBacks {
  414. if text != "" {
  415. if !in.IsLua { //正则
  416. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  417. } else { //lua
  418. result := GetResultMapForLua(j)
  419. lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
  420. if j != nil {
  421. lua.Block = j.Block
  422. }
  423. extinfo := lua.RunScript("back")
  424. if extinfo["value"] != nil {
  425. text = qu.ObjToString(extinfo["value"])
  426. }
  427. }
  428. }
  429. }
  430. pack[k] = text
  431. }
  432. }
  433. j.PackageInfo[pk] = pack
  434. }
  435. }
  436. }