rule.yaml 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. 结构化字段:
  2. 预告:
  3. - type: exists
  4. value:
  5. - buyer
  6. - budget
  7. - projectname
  8. - area
  9. - city
  10. - projectcode
  11. - procurementlist
  12. score: -10
  13. 招标:
  14. - type: exists
  15. value:
  16. - buyer
  17. - budget
  18. - projectname
  19. - area
  20. - city
  21. - projectcode
  22. score: -5
  23. - type: exists
  24. value:
  25. - bidopentime
  26. - bidendtime
  27. - buyertel
  28. - agency
  29. score: -3
  30. 结果:
  31. - type: exists
  32. value:
  33. - buyer
  34. - projectname
  35. - projectcode
  36. - winner
  37. - bidamount
  38. - area
  39. - city
  40. score: -5
  41. - type: exists
  42. value:
  43. - winnertel
  44. - projectperiod
  45. - purchasinglist
  46. score: -3
  47. 其他:
  48. - type: exists
  49. value:
  50. - buyer
  51. - budget
  52. - projectname
  53. - projectcode
  54. - winner
  55. - bidamount
  56. - area
  57. - city
  58. score: -5
  59. 采集字段:
  60. title:
  61. - type: length
  62. min: 0
  63. max: 5
  64. score: -5
  65. - type: length
  66. min: 50
  67. score: -5
  68. - type: regexp
  69. notReg: "[\\p{Han}]"
  70. score: -5
  71. detail:
  72. - type: length
  73. min: 0
  74. max: 50
  75. score: -10
  76. - type: length
  77. min: 50
  78. max: 200
  79. score: -5
  80. isValidFile:
  81. - type: equal
  82. value: false
  83. score: -5
  84. 站点画像:
  85. 政府采购网站:
  86. topType: 政府网站
  87. yes:
  88. 政府采购网: 10
  89. 公共资源交易中心: 10
  90. 默认: 5
  91. not:
  92. score: -3
  93. 需登录网站:
  94. score: -5
  95. 竞品网站:
  96. score: -40
  97. list:
  98. - 干里马招标网
  99. - 中国招标与采购网
  100. - 元博网
  101. 延时采集网站:
  102. score: -10
  103. 对比大模型:
  104. allExistsAndSame:
  105. fields:
  106. - area:s_area
  107. - city:s_city
  108. - buyer:s_buyer
  109. - budget:s_budget
  110. - s_winner:s_winner
  111. - bidamount:s_bidamount
  112. score: -5
  113. partExistsAndSame:
  114. fields:
  115. - projectname:s_projectname
  116. - projectcode:s_projectcode
  117. - winnertel:s_winnertel
  118. - agency:s_agency
  119. score: -3