extpackage.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. // extpackage
  2. package extract
  3. import (
  4. "jy/clear"
  5. ju "jy/util"
  6. "log"
  7. qu "qfw/util"
  8. "reflect"
  9. regexp "regexp"
  10. "sort"
  11. )
  12. func pkvdata(pkg *ju.BlockPackage, sonJobResult *map[string]interface{}, e *ExtractTask, isSite bool, codeSite string) {
  13. if pkg.ColonKV != nil {
  14. kvparse(pkg.ColonKV, e, sonJobResult, isSite, codeSite)
  15. }
  16. if pkg.TableKV != nil {
  17. kvparse(pkg.TableKV, e, sonJobResult, isSite, codeSite)
  18. }
  19. if pkg.SpaceKV != nil {
  20. kvparse(pkg.SpaceKV, e, sonJobResult, isSite, codeSite)
  21. }
  22. }
  23. func kvparse(p *ju.JobKv, e *ExtractTask, sonJobResult *map[string]interface{}, isSite bool, codeSite string) {
  24. if p != nil {
  25. for pk, pv2 := range p.KvTags {
  26. if len(pv2) > 1 && !(pk == "预算" || pk == "中标金额") {
  27. tmp := []*ju.Tag{}
  28. var tmpindex, tmpweight int = -9999, -9999
  29. for ii, vv := range pv2 {
  30. if pk == "中标单位" && regexp.MustCompile("[0-9.元人¥$]").MatchString(vv.Value) {
  31. continue
  32. }
  33. if tmpweight < vv.Weight {
  34. tmpindex = ii
  35. tmpweight = vv.Weight
  36. }
  37. }
  38. if tmpindex == -9999 {
  39. continue
  40. }
  41. tmp = append(tmp, pv2[tmpindex])
  42. p.KvTags[pk] = tmp
  43. }
  44. }
  45. for pk, pv := range p.KvTags {
  46. if len(pv) == 0 {
  47. continue
  48. }
  49. tags := ju.GetTags(pk, isSite, codeSite)
  50. if tags.Len() > 0 {
  51. if ((*sonJobResult)["name"] == nil || (*sonJobResult)["name"] == "") && tags[0].Key == "项目名称" {
  52. (*sonJobResult)["name"] = pv[0].Value
  53. }
  54. if qu.Float64All((*sonJobResult)["budget"]) == 0 && tags[0].Key == "预算" {
  55. lock.Lock()
  56. cfn := e.ClearFn["budget"]
  57. lock.Unlock()
  58. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  59. if data[0] ==0{
  60. if istrue,ok:= data[len(data)-1].(bool);istrue&&ok{
  61. (*sonJobResult)["budget"] = data[0]
  62. }else {
  63. continue
  64. }
  65. }
  66. (*sonJobResult)["budget"] = data[0]
  67. continue
  68. }
  69. if qu.Float64All((*sonJobResult)["bidamount"]) == 0 && tags[0].Key == "中标金额" {
  70. lock.Lock()
  71. cfn := e.ClearFn["budget"]
  72. lock.Unlock()
  73. data := clear.DoClearFn(cfn, []interface{}{pv[0].Value, ""})
  74. if istrue,ok:= data[len(data)-1].(bool);istrue&&ok{
  75. (*sonJobResult)["budget"] = data[0]
  76. }else {
  77. continue
  78. }
  79. (*sonJobResult)["bidamount"] = data[0]
  80. continue
  81. }
  82. if ((*sonJobResult)["winner"] == nil || (*sonJobResult)["winner"] == "") && tags[0].Key == "中标单位" {
  83. if winnerorderNotReg.MatchString(pv[0].Value){
  84. continue
  85. }
  86. (*sonJobResult)["winner"] = pv[0].Value
  87. }
  88. }
  89. if (*sonJobResult)["name"] == nil && pk == "名称" {
  90. (*sonJobResult)["name"] = pv[0].Value
  91. }
  92. }
  93. }
  94. }
  95. var winnerorderNotReg =regexp.MustCompile(`(附件|否决原因|候选|招标失败|注册表|交易中心|序号内容|不足|公告|变更|采购|招标|废标|废止|流标|中标|投标|评标|开标|供应商|金额|万元|元整|预算|报价|单价|第(\d|一|二|三|四|五)(名|包)|排名|候选|确定|标段|(标|一|二|三|四|五)包|中选|成交|包号|(A|B|C|D|E|F|G)包|地址|详情|要求|推荐|名称|评审|得分|合同|平方米|公示期|结果|备注|说明|单位|代表|委托|工作日|营业(执|期)|通过|代码|电话|联系|条件|合理|费率|以上|以下|拟定|为|注:|\d[\s]{0,10}(\.|元|包|米|平米|平方米|吨|辆|千克|克|毫克|毫升|公升|套|件|瓶|箱|只|台|年|月|日|天|号)|(:|:|;|;|?|¥|\*|%)|^[a-zA-Z0-9-]{5,100}|^[a-zA-Z0-9-]{1,100}$|[a-zA-Z0-9-]{10,100})`)
  96. //处理分包信息
  97. func PackageDetail(j *ju.Job, e *ExtractTask, isSite bool, codeSite string) {
  98. qu.Try(func() {
  99. if len(j.BlockPackage) > 0 {
  100. for _, ev := range e.PkgRuleCores {
  101. for _, eve := range ev.RuleCores {
  102. if !eve.IsLua {
  103. ExtRuleCoreByPkgReg(j, eve, e) // 分包正则抽取 预算 中标单位 中标价 成交状态
  104. }
  105. }
  106. for _, evb := range ev.RuleBacks {
  107. if !evb.IsLua {
  108. ExtRegBackPkg(j, evb) // 分包正则清理 中标单位 成交状态 内容 名称
  109. }
  110. }
  111. }
  112. tmpkeys := []string{}
  113. for k, _ := range j.BlockPackage {
  114. if k == "" {
  115. continue
  116. }
  117. tmpkeys = append(tmpkeys, k)
  118. }
  119. sort.Strings(tmpkeys)
  120. packageResult := map[string]map[string]interface{}{}
  121. //packagenum := len(j.BlockPackage)
  122. for _, pkName := range tmpkeys {
  123. pkg, ok := j.BlockPackage[pkName]
  124. if !ok {
  125. continue
  126. }
  127. //是否清理标记
  128. clearmap := map[string]bool{}
  129. sonJobResult := map[string]interface{}{}
  130. if pkg != nil {
  131. sonJobResult["origin"] = pkg.Origin
  132. sonJobResult["text"] = pkg.Text
  133. sonJobResult["budget"] = pkg.Budget
  134. sonJobResult["bidamount"] = pkg.Bidamount
  135. if pkg.Winner == "" && len(j.Winnerorder) > 0 {
  136. if sonJobResult["winnerorder"] == nil {
  137. sonJobResult["winnerorder"] = j.Winnerorder
  138. if sonJobResult["bidamount"].(float64) <= 0 {
  139. sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
  140. }
  141. if sonJobResult["winner"] == "" {
  142. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  143. }
  144. }
  145. } else {
  146. if len(j.Winnerorder) > 0 {
  147. sonJobResult["bidamount"] = qu.Float64All(j.Winnerorder[0]["price"])
  148. sonJobResult["winner"] = j.Winnerorder[0]["entname"]
  149. }
  150. if len(pkg.WinnerOrder) > 0 {
  151. sonJobResult["winnerorder"] = pkg.WinnerOrder
  152. sonJobResult["winner"] = pkg.WinnerOrder[0]["entname"]
  153. }
  154. if sonJobResult["winner"] == nil || sonJobResult["winner"] == "" {
  155. sonJobResult["winner"] = pkg.Winner
  156. }
  157. }
  158. pkvdata(pkg, &sonJobResult, e, isSite, codeSite)
  159. sonJobResult["type"] = pkg.Type
  160. if len(tmpkeys) == 1 {
  161. if qu.Float64All(sonJobResult["budget"]) == 0 {
  162. for _, bv := range j.Block {
  163. kvparse(bv.ColonKV, e, &sonJobResult, isSite, codeSite)
  164. kvparse(bv.TableKV, e, &sonJobResult, isSite, codeSite)
  165. kvparse(bv.SpaceKV, e, &sonJobResult, isSite, codeSite)
  166. }
  167. }
  168. }
  169. if sonJobResult["name"] == nil {
  170. sonJobResult["name"] = j.Title
  171. }
  172. }
  173. //分包暂不参与选举
  174. /*
  175. for k, tags := range e.Tag {
  176. L:
  177. for _, tag := range tags {
  178. if pkg.TableKV != nil {
  179. for key, val := range pkg.TableKV.Kv {
  180. if tag.Key == key {
  181. clearmap[k] = false
  182. var tmpval interface{}
  183. if len(e.ClearFn[k]) > 0 {
  184. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  185. tmpval = data[0]
  186. } else {
  187. tmpval = val
  188. }
  189. sonJobResult[k] = tmpval
  190. if packagenum == 1 {
  191. field := &ju.ExtField{
  192. Field: k,
  193. Code: "package",
  194. RuleText: "package",
  195. Type: "table",
  196. MatchType: "tag_string",
  197. ExtFrom: "package",
  198. Value: tmpval,
  199. Score: 0,
  200. }
  201. j.Result[k] = append(j.Result[k], field)
  202. }
  203. break L
  204. }
  205. }
  206. }
  207. if pkg.ColonKV != nil {
  208. for key, val := range pkg.ColonKV.Kv {
  209. if tag.Key == key {
  210. clearmap[k] = true
  211. var tmpval interface{}
  212. if len(e.ClearFn[k]) > 0 {
  213. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  214. tmpval = data[0]
  215. } else {
  216. tmpval = val
  217. }
  218. sonJobResult[k] = tmpval
  219. if packagenum == 1 {
  220. field := &ju.ExtField{
  221. Field: k,
  222. Code: "package",
  223. RuleText: "package",
  224. Type: "colon",
  225. MatchType: "tag_string",
  226. ExtFrom: "package",
  227. Value: tmpval,
  228. Score: 0,
  229. }
  230. j.Result[k] = append(j.Result[k], field)
  231. }
  232. break L
  233. }
  234. }
  235. }
  236. if pkg.SpaceKV != nil {
  237. for key, val := range pkg.SpaceKV.Kv {
  238. if tag.Key == key {
  239. clearmap[k] = true
  240. var tmpval interface{}
  241. if len(e.ClearFn[k]) > 0 {
  242. data := clear.DoClearFn(e.ClearFn[k], []interface{}{val, j.Content})
  243. tmpval = data[0]
  244. } else {
  245. tmpval = val
  246. }
  247. sonJobResult[k] = tmpval
  248. if packagenum == 1 {
  249. field := &ju.ExtField{
  250. Field: k,
  251. Code: "package",
  252. RuleText: "package",
  253. Type: "space",
  254. MatchType: "tag_string",
  255. ExtFrom: "package",
  256. Value: tmpval,
  257. Score: 0,
  258. }
  259. j.Result[k] = append(j.Result[k], field)
  260. }
  261. break L
  262. }
  263. }
  264. }
  265. }
  266. }
  267. */
  268. //如果有中标候选人排序,优先用第一中标候选人的中标单位和中标金额覆盖该包里面相应的字段的值
  269. if pkg.WinnerOrder != nil && len(pkg.WinnerOrder) > 0 {
  270. firstWinnerOrder := pkg.WinnerOrder[0]
  271. if qu.ObjToString(sonJobResult["winner"]) == "" || (!pkg.Accuracy && qu.ObjToString(firstWinnerOrder["entname"]) != "" && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  272. sonJobResult["winner"] = firstWinnerOrder["entname"]
  273. }
  274. if qu.Float64All(sonJobResult["bidamount"]) == 0 || (!pkg.Accuracy && qu.Float64All(firstWinnerOrder["price"]) > 0 && qu.Int64All(firstWinnerOrder["sort"]) == 1) {
  275. sonJobResult["bidamount"] = firstWinnerOrder["price"]
  276. }
  277. }
  278. //log.Println(pkName, sonJobResult)
  279. sonJobResult["clear"] = clearmap
  280. packageResult[pkName] = sonJobResult
  281. }
  282. if len(packageResult) > 0 {
  283. j.PackageInfo = packageResult
  284. if len(j.Result["winner"]) == 0 {
  285. tmpsss := []*ju.ExtField{}
  286. for k, v := range packageResult {
  287. tmpsss = append(tmpsss, &ju.ExtField{Field: "winner", ExtFrom: "j.PackageInfo." + k + ".winner", Value: v["winner"], Weight: -999})
  288. }
  289. j.Result["winner"] = tmpsss
  290. }
  291. }
  292. }
  293. //extRegBackPack(j, e)
  294. }, func(err interface{}) {
  295. log.Println("PackageDetail err", err)
  296. })
  297. }
  298. //清理分包信息
  299. func extRegBackPack(j *ju.Job, e *ExtractTask) {
  300. defer qu.Catch()
  301. //正则清理
  302. if j.CategorySecond == "" {
  303. for _, rc1 := range e.RuleCores[j.Category] {
  304. for _, rc := range rc1 {
  305. for pk, pack := range j.PackageInfo {
  306. clear, _ := pack["clear"].(map[string]interface{})
  307. for k, val := range pack {
  308. if b, ok := clear[k].(bool); ok && b {
  309. if rc.Field == k {
  310. text := qu.ObjToString(val)
  311. for _, in := range rc.RuleBacks {
  312. if text != "" && !in.IsLua {
  313. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  314. }
  315. }
  316. pack[k] = text
  317. }
  318. }
  319. }
  320. j.PackageInfo[pk] = pack
  321. }
  322. }
  323. }
  324. } else {
  325. for _, rc1 := range e.RuleCores[j.Category+"_"+j.CategorySecond] {
  326. for _, rc := range rc1 {
  327. for pk, pack := range j.PackageInfo {
  328. clear, _ := pack["clear"].(map[string]interface{})
  329. for k, val := range pack {
  330. if b, ok := clear[k].(bool); ok && b {
  331. if rc.Field == k {
  332. text := qu.ObjToString(val)
  333. for _, in := range rc.RuleBacks {
  334. if text != "" && !in.IsLua {
  335. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  336. }
  337. }
  338. pack[k] = text
  339. }
  340. }
  341. }
  342. j.PackageInfo[pk] = pack
  343. }
  344. }
  345. }
  346. }
  347. //通用正则清理
  348. for _, in := range e.RuleBacks {
  349. for _, pack := range j.PackageInfo {
  350. for k, val := range pack {
  351. if in.Field == k {
  352. text := qu.ObjToString(val)
  353. if text != "" && !in.IsLua {
  354. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  355. }
  356. pack[k] = text
  357. }
  358. }
  359. }
  360. }
  361. //函数清理
  362. for _, pack := range j.PackageInfo {
  363. for key, val := range pack {
  364. if reflect.TypeOf(val) != nil && (reflect.TypeOf(val).String() == "float64" || reflect.TypeOf(val).String() == "int64") {
  365. continue
  366. } else {
  367. lock.Lock()
  368. cfn := e.ClearFn[key]
  369. lock.Unlock()
  370. data := clear.DoClearFn(cfn, []interface{}{val, j.Content})
  371. pack[key] = data[0]
  372. }
  373. }
  374. }
  375. //特殊属性的分包清理
  376. for _, rc := range e.PkgRuleCores {
  377. for pk, pack := range j.PackageInfo {
  378. for k, val := range pack {
  379. if rc.Field == k {
  380. text := qu.ObjToString(val)
  381. for _, in := range rc.RuleBacks {
  382. if text != "" {
  383. if !in.IsLua { //正则
  384. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  385. } else { //lua
  386. result := GetResultMapForLua(j)
  387. lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
  388. if j != nil {
  389. lua.Block = j.Block
  390. }
  391. extinfo := lua.RunScript("back")
  392. if extinfo["value"] != nil {
  393. text = qu.ObjToString(extinfo["value"])
  394. }
  395. }
  396. }
  397. }
  398. pack[k] = text
  399. }
  400. }
  401. j.PackageInfo[pk] = pack
  402. }
  403. }
  404. }