extpackage.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. // extpackage
  2. package extract
  3. import (
  4. "jy/clear"
  5. ju "jy/util"
  6. "log"
  7. qu "qfw/util"
  8. "reflect"
  9. "regexp"
  10. "sort"
  11. )
  12. func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask, isSite bool, codeSite string) {
  13. if pkg.ColonKV != nil {
  14. kvparse(pkg.ColonKV, e, sonJobResult, isSite, codeSite)
  15. }
  16. if pkg.TableKV != nil {
  17. kvparse(pkg.TableKV, e, sonJobResult, isSite, codeSite)
  18. }
  19. if pkg.SpaceKV != nil {
  20. kvparse(pkg.SpaceKV, e, sonJobResult, isSite, codeSite)
  21. }
  22. }
  23. func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string) {
  24. if p != nil {
  25. for pk, pv2 := range p.KvTags {
  26. if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
  27. tmp := []*ju.Tag{}
  28. var tmpindex, tmpweight int = -9999, -9999
  29. for ii, vv := range pv2 {
  30. if pk == "中标单位" && regexp.MustCompile("[0-9.元人¥$]").MatchString(vv.Value) {
  31. continue
  32. }
  33. if tmpweight < vv.Weight {
  34. tmpindex = ii
  35. tmpweight = vv.Weight
  36. }
  37. }
  38. if tmpindex == -9999 {
  39. continue
  40. }
  41. tmp = append(tmp, pv2[tmpindex])
  42. p.KvTags[pk] = tmp
  43. }
  44. }
  45. for pk, pv := range p.KvTags {
  46. if len(pv) == 0 {
  47. continue
  48. }
  49. tags := ju.GetTags(pk, isSite, codeSite)
  50. if tags.Len() > 0 {
  51. if ((*sonJobResult)["name"] == nil || (*sonJobResult)["name"] == "") && tags[0].Key == "项目名称" {
  52. (*sonJobResult)["name"] = pv[0].Value
  53. }
  54. if qu.Float64All((*sonJobResult)["budget"]) == 0 && tags[0].Key == "预算" {
  55. lock.Lock()
  56. cfn := e.ClearFn["budget"]
  57. lock.Unlock()
  58. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  59. if data[0] == 0 {
  60. if istrue, ok := data[len(data)-1].(bool); istrue && ok {
  61. (*sonJobResult)["budget"] = data[0]
  62. }
  63. }
  64. continue
  65. }
  66. if qu.Float64All((*sonJobResult)["bidamount"]) == 0 && tags[0].Key == "中标金额" {
  67. lock.Lock()
  68. cfn := e.ClearFn["bidamount"]
  69. lock.Unlock()
  70. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  71. if istrue, ok := data[len(data)-1].(bool); istrue && ok {
  72. (*sonJobResult)["bidamount"] = data[0]
  73. }
  74. continue
  75. }
  76. if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "") && tags[0].Key == "中标单位" {
  77. if winnerorderNotReg.MatchString(pv[0].Value) {
  78. continue
  79. }
  80. (*sonJobResult)["winner"] = pv[0].Value
  81. }
  82. }
  83. if (*sonJobResult)["name"] == nil && pk == "名称" {
  84. (*sonJobResult)["name"] = pv[0].Value
  85. }
  86. }
  87. }
  88. }
  89. var winnerorderNotReg = regexp.MustCompile(`(附件|否决原因|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\d[\s]{0,10}(\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})`)
  90. //处理分包信息
  91. func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
  92. qu.Try(func() {
  93. if len(j.BlockPackage) > 0 {
  94. for _, ev := range e.PkgRuleCores {
  95. for _, eve := range ev.RuleCores {
  96. if !eve.IsLua {
  97. ExtRuleCoreByPkgReg(j, eve, e) // 分包正则抽取 预算 中标单位 中标价 成交状态
  98. }
  99. }
  100. for _, evb := range ev.RuleBacks {
  101. if !evb.IsLua {
  102. ExtRegBackPkg(j, evb) // 分包正则清理 中标单位 成交状态 内容 名称
  103. }
  104. }
  105. }
  106. ordertmp := map[int][]map[string]interface{}{}
  107. ordertmpint := []int{}//中标排序人一共几组
  108. //中标候选人
  109. for _, v := range j.Winnerorder {
  110. if vtype, ok := v["type"].(int); ok {
  111. if ordertmp[vtype] == nil {
  112. ordertmpint = append(ordertmpint, vtype)
  113. tmp := make([]map[string]interface{}, 0)
  114. tmp = append(tmp, v)
  115. ordertmp[vtype] = tmp
  116. } else {
  117. ordertmp[vtype] = append(ordertmp[vtype], v)
  118. }
  119. }
  120. }
  121. tmpkeys := []string{}
  122. for k, _ := range j.BlockPackage {
  123. if k == "" {
  124. continue
  125. }
  126. tmpkeys = append(tmpkeys, k)
  127. }
  128. sort.Strings(tmpkeys)
  129. packageResult := map[string]map[string]interface{}{}
  130. //packagenum := len(j.BlockPackage)
  131. tmpindex :=-1
  132. for _, pkName := range tmpkeys {
  133. tmpindex++
  134. pkg, ok := j.BlockPackage[pkName]
  135. if !ok {
  136. continue
  137. }
  138. //是否清理标记
  139. clearmap := map[string]bool{}
  140. sonJobResult := map[string]interface{}{}
  141. if pkg != nil {
  142. sonJobResult["origin"] = pkg.Origin
  143. sonJobResult["text"] = pkg.Text
  144. sonJobResult["name"] = pkg.Name
  145. if pkg.WinnerPerson != "" {
  146. sonJobResult["winnertel"] = pkg.WinnerTel
  147. sonJobResult["winnerperson"] = pkg.WinnerPerson
  148. }
  149. if pkg.IsTrueBudget{
  150. sonJobResult["budget"] = pkg.Budget
  151. }
  152. if pkg.IsTrueBidamount {
  153. sonJobResult["bidamount"] = pkg.Bidamount
  154. }
  155. if pkg.Winner == "" && len(j.Winnerorder) > 0 {
  156. if sonJobResult["winnerorder"] == nil && len(tmpkeys) == len(ordertmpint) {//分包和中标候选人长度一样
  157. //ordertmp[ordertmpint[tmpindex]] 取中标候选人组
  158. sonJobResult["winnerorder"] = ordertmp[ordertmpint[tmpindex]]
  159. if sonJobResult["bidamount"] == nil || sonJobResult["bidamount"].(float64) <= 0 {
  160. if j.Winnerorder[0]["price"] != nil {
  161. moneys := clear.ObjToMoney([]interface{}{j.Winnerorder[0]["price"],""})
  162. if len(moneys) > 0 &&moneys[len(moneys)-1].(bool){
  163. if vf, ok := moneys[0].(float64); ok {
  164. sonJobResult["bidamount"] = vf
  165. }
  166. }
  167. }
  168. }
  169. if sonJobResult["winner"] == "" {
  170. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  171. }
  172. }
  173. } else {
  174. if len(j.Winnerorder) > 0 {
  175. if j.Winnerorder[0]["price"] != nil {
  176. sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
  177. }
  178. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  179. }
  180. if len(pkg.WinnerOrder) > 0 {
  181. sonJobResult["winnerorder"] = pkg.WinnerOrder
  182. sonJobResult["winner"] = pkg.WinnerOrder[0]["entname"]
  183. }
  184. if sonJobResult["winner"] == nil || sonJobResult["winner"] == "" {
  185. sonJobResult["winner"] = pkg.Winner
  186. }
  187. }
  188. pkvdata(pkg, &sonJobResult, e, isSite, codeSite)
  189. sonJobResult["type"] = pkg.Type
  190. if len(tmpkeys) == 1 {
  191. if qu.Float64All(sonJobResult["budget"]) == 0 && pkg.IsTrueBudget {
  192. for _, bv := range j.Block {
  193. kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite)
  194. kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite)
  195. kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite)
  196. }
  197. }
  198. }
  199. if sonJobResult["name"] == nil || sonJobResult["name"] == "" {
  200. sonJobResult["name"] = j.Title
  201. }
  202. }
  203. //分包暂不参与选举
  204. /*
  205. for k, tags := range e.Tag {
  206. L:
  207. for _, tag := range tags {
  208. if pkg.TableKV != nil {
  209. for key, val := range pkg.TableKV.Kv {
  210. if tag.Key == key {
  211. clearmap[k] = false
  212. var tmpval interface{}
  213. if len(e.ClearFn[k]) > 0 {
  214. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  215. tmpval = data[0]
  216. } else {
  217. tmpval = val
  218. }
  219. sonJobResult[k] = tmpval
  220. if packagenum == 1 {
  221. field := &ju.ExtField{
  222. Field: k,
  223. Code: "package",
  224. RuleText: "package",
  225. Type: "table",
  226. MatchType: "tag_string",
  227. ExtFrom: "package",
  228. Value: tmpval,
  229. Score: 0,
  230. }
  231. j.Result[k] = append(j.Result[k], field)
  232. }
  233. break L
  234. }
  235. }
  236. }
  237. if pkg.ColonKV != nil {
  238. for key, val := range pkg.ColonKV.Kv {
  239. if tag.Key == key {
  240. clearmap[k] = true
  241. var tmpval interface{}
  242. if len(e.ClearFn[k]) > 0 {
  243. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  244. tmpval = data[0]
  245. } else {
  246. tmpval = val
  247. }
  248. sonJobResult[k] = tmpval
  249. if packagenum == 1 {
  250. field := &ju.ExtField{
  251. Field: k,
  252. Code: "package",
  253. RuleText: "package",
  254. Type: "colon",
  255. MatchType: "tag_string",
  256. ExtFrom: "package",
  257. Value: tmpval,
  258. Score: 0,
  259. }
  260. j.Result[k] = append(j.Result[k], field)
  261. }
  262. break L
  263. }
  264. }
  265. }
  266. if pkg.SpaceKV != nil {
  267. for key, val := range pkg.SpaceKV.Kv {
  268. if tag.Key == key {
  269. clearmap[k] = true
  270. var tmpval interface{}
  271. if len(e.ClearFn[k]) > 0 {
  272. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  273. tmpval = data[0]
  274. } else {
  275. tmpval = val
  276. }
  277. sonJobResult[k] = tmpval
  278. if packagenum == 1 {
  279. field := &ju.ExtField{
  280. Field: k,
  281. Code: "package",
  282. RuleText: "package",
  283. Type: "space",
  284. MatchType: "tag_string",
  285. ExtFrom: "package",
  286. Value: tmpval,
  287. Score: 0,
  288. }
  289. j.Result[k] = append(j.Result[k], field)
  290. }
  291. break L
  292. }
  293. }
  294. }
  295. }
  296. }
  297. */
  298. //如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值
  299. if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 {
  300. firstWinnerOrder := pkg.WinnerOrder[0]
  301. if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  302. sonJobResult["winner"] = firstWinnerOrder["entname"]
  303. }
  304. if (qu.Float64All(sonJobResult["bidamount"]) == 0 && pkg.IsTrueBidamount) || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  305. if firstWinnerOrder["price"] != nil {
  306. sonJobResult["bidamount"] = firstWinnerOrder["price"]
  307. }
  308. }
  309. }
  310. //log.Println(pkName, sonJobResult)
  311. sonJobResult["clear"] = clearmap
  312. packageResult[pkName] = sonJobResult
  313. }
  314. if len(packageResult) > 0 {
  315. j.PackageInfo = packageResult
  316. if len(j.Result["winner"]) == 0 {
  317. tmpsss := []*ju.ExtField{}
  318. for k, v := range packageResult {
  319. tmpsss = append(tmpsss, &ju.ExtField{Field: "winner", ExtFrom: "j.PackageInfo." + k + ".winner", Value: v["winner"], Weight: -999})
  320. }
  321. j.Result["winner"] = tmpsss
  322. }
  323. }
  324. }
  325. //extRegBackPack(j, e)
  326. }, func(err interface{}) {
  327. log.Println("PackageDetail err", err)
  328. })
  329. }
  330. //清理分包信息
  331. func extRegBackPack(j *ju.Job, e *ExtractTask) {
  332. defer qu.Catch()
  333. //正则清理
  334. if j.CategorySecond == "" {
  335. for _, rc1 := range e.RuleCores[j.Category] {
  336. for _, rc := range rc1 {
  337. for pk, pack := range j.PackageInfo {
  338. clear, _ := pack["clear"].(map[string]interface{})
  339. for k, val := range pack {
  340. if b, ok := clear[k].(bool); ok && b {
  341. if rc.Field == k {
  342. text := qu.ObjToString(val)
  343. for _, in := range rc.RuleBacks {
  344. if text != "" && !in.IsLua {
  345. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  346. }
  347. }
  348. pack[k] = text
  349. }
  350. }
  351. }
  352. j.PackageInfo[pk] = pack
  353. }
  354. }
  355. }
  356. } else {
  357. for _, rc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
  358. for _, rc := range rc1 {
  359. for pk, pack := range j.PackageInfo {
  360. clear, _ := pack["clear"].(map[string]interface{})
  361. for k, val := range pack {
  362. if b, ok := clear[k].(bool); ok && b {
  363. if rc.Field == k {
  364. text := qu.ObjToString(val)
  365. for _, in := range rc.RuleBacks {
  366. if text != "" && !in.IsLua {
  367. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  368. }
  369. }
  370. pack[k] = text
  371. }
  372. }
  373. }
  374. j.PackageInfo[pk] = pack
  375. }
  376. }
  377. }
  378. }
  379. //通用正则清理
  380. for _, in := range e.RuleBacks {
  381. for _, pack := range j.PackageInfo {
  382. for k, val := range pack {
  383. if in.Field == k {
  384. text := qu.ObjToString(val)
  385. if text != "" && !in.IsLua {
  386. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  387. }
  388. pack[k] = text
  389. }
  390. }
  391. }
  392. }
  393. //函数清理
  394. for _, pack := range j.PackageInfo {
  395. for key, val := range pack {
  396. if reflect.TypeOf(val) != nil && (reflect.TypeOf(val).String() == "float64" || reflect.TypeOf(val).String() == "int64") {
  397. continue
  398. } else {
  399. lock.Lock()
  400. cfn := e.ClearFn[key]
  401. lock.Unlock()
  402. data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
  403. pack[key] = data[0]
  404. }
  405. }
  406. }
  407. //特殊属性的分包清理
  408. for _, rc := range e.PkgRuleCores {
  409. for pk, pack := range j.PackageInfo {
  410. for k, val := range pack {
  411. if rc.Field == k {
  412. text := qu.ObjToString(val)
  413. for _, in := range rc.RuleBacks {
  414. if text != "" {
  415. if !in.IsLua { //正则
  416. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  417. } else { //lua
  418. result := GetResultMapForLua(j)
  419. lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
  420. if j != nil {
  421. lua.Block = j.Block
  422. }
  423. extinfo := lua.RunScript("back")
  424. if extinfo["value"] != nil {
  425. text = qu.ObjToString(extinfo["value"])
  426. }
  427. }
  428. }
  429. }
  430. pack[k] = text
  431. }
  432. }
  433. j.PackageInfo[pk] = pack
  434. }
  435. }
  436. }