main.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. /*
  2. 抽取结果对比
  3. */
  4. package main
  5. import (
  6. "flag"
  7. "fmt"
  8. "jy/mongodbutil"
  9. "log"
  10. qu "qfw/util"
  11. "strings"
  12. "github.com/tealeg/xlsx"
  13. "gopkg.in/mgo.v2/bson"
  14. )
  15. var (
  16. SysConfig map[string]interface{}
  17. Premgo *mongodbutil.Pool //上个版本库
  18. Newmgo *mongodbutil.Pool //当前版本库
  19. FieldData map[string]map[string]*Data
  20. Compares map[string]*Compare
  21. Sid, Eid string
  22. Fields []string
  23. FieldsQuery string
  24. Url = "https://www.jianyu360.com/article/content/%s.html"
  25. )
  26. type Compare struct {
  27. Field string //属性
  28. PreExtNum, NewExtNum int //上个版、当前版有值数量
  29. PreNilnum, NewNilnum int //上个版、当前版无值数量
  30. EqNum, NEqNum int //相等、不等数据量
  31. }
  32. type Data struct {
  33. Id string
  34. PreVal, NewVal string
  35. }
  36. func init() {
  37. flag.StringVar(&Sid, "sid", "5e17deb150b5ea296ec939d3", "开始id")
  38. flag.StringVar(&Eid, "eid", "5e17e1e685a9271abf08616d", "结束id")
  39. flag.Parse()
  40. qu.ReadConfig(&SysConfig)
  41. Premgo = mongodbutil.MgoFactory(1, 3, 120, qu.ObjToString(SysConfig["premgo"]), qu.ObjToString(SysConfig["predb"]))
  42. Newmgo = mongodbutil.MgoFactory(1, 3, 120, qu.ObjToString(SysConfig["newmgo"]), qu.ObjToString(SysConfig["newdb"]))
  43. tmp, _ := SysConfig["fields"].([]interface{})
  44. for k, v := range tmp {
  45. Fields = append(Fields, qu.ObjToString(v))
  46. if k < (len(tmp) - 1) {
  47. FieldsQuery += `"` + qu.ObjToString(v) + `":1,`
  48. } else {
  49. FieldsQuery += `"` + qu.ObjToString(v) + `":1`
  50. }
  51. }
  52. FieldData = map[string]map[string]*Data{}
  53. Compares = map[string]*Compare{}
  54. }
  55. func main() {
  56. getVersionData()
  57. createXlsx()
  58. //biaozhucompare()
  59. }
  60. func createXlsx() {
  61. xf, err := xlsx.OpenFile("template.xlsx")
  62. if err != nil {
  63. log.Println(err)
  64. return
  65. }
  66. //生成第一个sheet信息
  67. sh := xf.Sheets[0]
  68. for i, field := range Fields {
  69. for k, row := range sh.Rows {
  70. if k > 2+i {
  71. style := (*row).Cells[1].GetStyle()
  72. style.Font.Color = "000000"
  73. (*row).Cells[0].SetString(field)
  74. (*row).Cells[1].SetStyle(style)
  75. (*row).Cells[2].SetStyle(style)
  76. (*row).Cells[3].SetStyle(style)
  77. (*row).Cells[4].SetStyle(style)
  78. if Compares[field] == nil {
  79. (*row).Cells[1].SetInt(0)
  80. (*row).Cells[2].SetInt(0)
  81. (*row).Cells[3].SetInt(0)
  82. (*row).Cells[4].SetInt(0)
  83. } else {
  84. (*row).Cells[1].SetInt(Compares[field].PreExtNum)
  85. (*row).Cells[2].SetInt(Compares[field].NewExtNum)
  86. (*row).Cells[3].SetInt(Compares[field].EqNum)
  87. (*row).Cells[4].SetInt(Compares[field].NEqNum)
  88. }
  89. }
  90. sh.Rows[k] = row
  91. }
  92. }
  93. //生成信息sheet
  94. for _, field := range Fields {
  95. sh, _ := xf.AddSheet(field)
  96. rowh := sh.AddRow()
  97. rowh.AddCell().SetString("id")
  98. rowh.AddCell().SetString("preval")
  99. rowh.AddCell().SetString("newval")
  100. rowh.AddCell().SetString("url")
  101. tmp := FieldData[field]
  102. for k, v := range tmp {
  103. if v.NewVal != v.PreVal {
  104. row := sh.AddRow()
  105. row.AddCell().SetString(k)
  106. row.AddCell().SetString(v.PreVal)
  107. row.AddCell().SetString(v.NewVal)
  108. row.AddCell().SetString(fmt.Sprintf(Url, qu.CommonEncodeArticle("content", v.Id)))
  109. }
  110. }
  111. }
  112. err = xf.Save("result.xlsx")
  113. if err != nil {
  114. log.Println("保存xlsx失败:", err)
  115. return
  116. }
  117. log.Println("xlsx保存成功")
  118. }
  119. func getVersionData() {
  120. query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(Sid), "$lte": bson.ObjectIdHex(Eid)}}
  121. log.Println(qu.ObjToString(SysConfig["prec"]), query)
  122. list1, _ := Premgo.Find(qu.ObjToString(SysConfig["prec"]), query, nil, `{`+FieldsQuery+`}`, false, -1, -1)
  123. for _, v := range *list1 {
  124. for _, key := range Fields {
  125. rd := FieldData[key]
  126. if rd == nil {
  127. rd = map[string]*Data{}
  128. }
  129. if v[key] == nil && strings.TrimSpace(qu.ObjToString(v[key])) == "" {
  130. continue
  131. }
  132. rd[qu.BsonIdToSId(v["_id"])] = &Data{
  133. Id: qu.BsonIdToSId(v["_id"]),
  134. PreVal: fmt.Sprint(v[key]),
  135. }
  136. FieldData[key] = rd
  137. }
  138. }
  139. log.Println("pre version 加载完成")
  140. list2, _ := Newmgo.Find(qu.ObjToString(SysConfig["newc"]), query, nil, `{`+FieldsQuery+`}`, false, -1, -1)
  141. for _, v := range *list2 {
  142. for _, field := range Fields {
  143. rd := FieldData[field]
  144. if rd == nil {
  145. rd = map[string]*Data{}
  146. }
  147. if v[field] == nil && strings.TrimSpace(qu.ObjToString(v[field])) == "" {
  148. continue
  149. }
  150. _id := qu.BsonIdToSId(v["_id"])
  151. tmp := rd[_id]
  152. if tmp != nil {
  153. tmp.NewVal = fmt.Sprint(v[field])
  154. rd[_id] = tmp
  155. } else {
  156. rd[_id] = &Data{
  157. NewVal: fmt.Sprint(v[field]),
  158. }
  159. }
  160. FieldData[field] = rd
  161. }
  162. }
  163. log.Println("new version 加载完成")
  164. for k, v := range FieldData {
  165. cp := &Compare{Field: k}
  166. for _, d := range v {
  167. if d.NewVal != "" && d.PreVal != "" {
  168. if d.NewVal == d.PreVal {
  169. cp.EqNum++
  170. } else {
  171. cp.NEqNum++
  172. }
  173. cp.PreExtNum++
  174. cp.NewExtNum++
  175. } else {
  176. if d.NewVal == "" {
  177. cp.NewNilnum++
  178. if d.PreVal != "" {
  179. cp.NEqNum++
  180. cp.PreExtNum++
  181. }
  182. }
  183. if d.PreVal == "" {
  184. cp.PreNilnum++
  185. if d.NewVal != "" {
  186. cp.NewExtNum++
  187. cp.NEqNum++
  188. }
  189. }
  190. }
  191. }
  192. Compares[k] = cp
  193. }
  194. }
  195. type BidData struct {
  196. id string
  197. key map[string]interface{}
  198. }
  199. type BidCom struct {
  200. Val []int
  201. Ids []map[string]interface{}
  202. }
  203. //标注正确率统计
  204. func biaozhucompare() {
  205. exts, _ := Newmgo.Find("bid_v3", `{}`, `{"_id":1}`, nil, false, -1, -1)
  206. extDatas := []BidData{}
  207. for _, v := range *exts {
  208. key := map[string]interface{}{
  209. "projectname": v["projectname"],
  210. "projectcode": v["projectcode"],
  211. "buyer": v["buyer"],
  212. "budget": qu.Float64All(v["budget"]),
  213. "bidamount": qu.Float64All(v["bidamount"]),
  214. "agency": v["agency"],
  215. "buyerperson": v["buyerperson"],
  216. "buyertel": v["buyertel"],
  217. }
  218. ext := BidData{
  219. id: qu.BsonIdToSId(v["_id"]),
  220. key: key,
  221. }
  222. extDatas = append(extDatas, ext)
  223. }
  224. log.Println("exts ok")
  225. bzs, _ := Newmgo.Find("bid_biaozhuid", `{}`, `{"_id":1}`, nil, false, -1, -1)
  226. bzDatas := []BidData{}
  227. for _, v := range *bzs {
  228. bidamount := float64(0)
  229. if bigprices, ok := v["bigprice"].([]interface{}); ok {
  230. bidamount = qu.Float64All(bigprices[0])
  231. }
  232. key := map[string]interface{}{
  233. "projectname": qu.ObjToString(v["projectname"]),
  234. "projectcode": qu.ObjToString(v["projectcode"]), //qu.If(qu.ObjToString(v["t_bidno"]) == "", qu.ObjToString(v["b_projectno"]), qu.ObjToString(v["t_bidno"])),
  235. "buyer": qu.ObjToString(v["buyer"]), // qu.If(qu.ObjToString(v["t_buyer"]) == "", qu.ObjToString(v["b_buyer"]), qu.ObjToString(v["t_buyer"])),
  236. "budget": qu.Float64All(qu.ObjToString(v["budget"])), // qu.Float64All(qu.ObjToString(v["t_budget"])),
  237. "bidamount": bidamount,
  238. "agency": qu.ObjToString(v["agency"]),
  239. "buyerperson": qu.ObjToString(v["buyerperson"]),
  240. "buyertel": qu.ObjToString(v["buyertel"]),
  241. }
  242. bz := BidData{
  243. id: qu.BsonIdToSId(v["_id"]),
  244. key: key,
  245. }
  246. bzDatas = append(bzDatas, bz)
  247. }
  248. log.Println("bzs ok")
  249. bcoms := map[string]*BidCom{}
  250. for _, ext := range extDatas {
  251. for _, bz := range bzDatas {
  252. if bz.id == ext.id {
  253. for key, val := range ext.key {
  254. // if key == "budget" {
  255. // log.Println(key, ext.key[key], ";;;;;", bz.key[key])
  256. // }
  257. if qu.ObjToString(val) != "" || qu.Float64All(val) > 0 {
  258. bcom := bcoms[key]
  259. if bcom == nil {
  260. bcom = &BidCom{
  261. Val: []int{0, 0},
  262. Ids: []map[string]interface{}{},
  263. }
  264. }
  265. if val == bz.key[key] {
  266. bcom.Val[0] += 1
  267. } else {
  268. bcom.Val[1] += 1
  269. tmp := map[string]interface{}{
  270. "id": ext.id,
  271. "ext": val,
  272. "bz": bz.key[key],
  273. }
  274. bcom.Ids = append(bcom.Ids, tmp)
  275. }
  276. bcoms[key] = bcom
  277. }
  278. }
  279. break
  280. }
  281. }
  282. }
  283. xl := xlsx.NewFile()
  284. sh, _ := xl.AddSheet("统计")
  285. h := sh.AddRow()
  286. h.AddCell().SetString("field")
  287. h.AddCell().SetString("相同")
  288. h.AddCell().SetString("不同")
  289. for k, v := range bcoms {
  290. row := sh.AddRow()
  291. row.AddCell().SetString(k)
  292. row.AddCell().SetInt(v.Val[0])
  293. row.AddCell().SetInt(v.Val[1])
  294. ksh, _ := xl.AddSheet(k)
  295. rh := ksh.AddRow()
  296. rh.AddCell().SetString("id")
  297. rh.AddCell().SetString("标注")
  298. rh.AddCell().SetString("抽取")
  299. rh.AddCell().SetString("url")
  300. for _, v := range v.Ids {
  301. rw := ksh.AddRow()
  302. rw.AddCell().SetString(qu.ObjToString(v["id"]))
  303. rw.AddCell().SetString(fmt.Sprint(v["bz"]))
  304. rw.AddCell().SetString(fmt.Sprint(v["ext"]))
  305. rw.AddCell().SetString(fmt.Sprintf(Url, qu.CommonEncodeArticle("content", qu.ObjToString(v["id"]))))
  306. }
  307. log.Println(k, v.Val)
  308. }
  309. xl.Save("ext_bz.xlsx")
  310. }