main.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. /*
  2. 抽取结果对比
  3. */
  4. package main
  5. import (
  6. "flag"
  7. "fmt"
  8. "jy/mongodbutil"
  9. "log"
  10. qu "qfw/util"
  11. "github.com/tealeg/xlsx"
  12. "gopkg.in/mgo.v2/bson"
  13. )
  14. var (
  15. SysConfig map[string]interface{}
  16. Premgo *mongodbutil.Pool //上个版本库
  17. Newmgo *mongodbutil.Pool //当前版本库
  18. FieldData map[string]map[string]*Data
  19. Compares map[string]*Compare
  20. Sid, Eid string
  21. Fields []string
  22. FieldsQuery string
  23. Url = "https://www.jianyu360.com/article/content/%s.html"
  24. )
  25. type Compare struct {
  26. Field string //属性
  27. PreExtNum, NewExtNum int //上个版、当前版有值数量
  28. PreNilnum, NewNilnum int //上个版、当前版无值数量
  29. EqNum, NEqNum int //相等、不等数据量
  30. }
  31. type Data struct {
  32. Id string
  33. PreVal, NewVal string
  34. }
  35. func init() {
  36. flag.StringVar(&Sid, "sid", "5d348c0ca5cb26b9b76a4bb8", "开始id")
  37. flag.StringVar(&Eid, "eid", "5d34ae22a5cb26b9b7850b43", "结束id")
  38. flag.Parse()
  39. qu.ReadConfig(&SysConfig)
  40. Premgo = mongodbutil.MgoFactory(1, 3, 120, qu.ObjToString(SysConfig["premgo"]), qu.ObjToString(SysConfig["predb"]))
  41. Newmgo = mongodbutil.MgoFactory(1, 3, 120, qu.ObjToString(SysConfig["newmgo"]), qu.ObjToString(SysConfig["newdb"]))
  42. tmp, _ := SysConfig["fields"].([]interface{})
  43. for k, v := range tmp {
  44. Fields = append(Fields, qu.ObjToString(v))
  45. if k < (len(tmp) - 1) {
  46. FieldsQuery += `"` + qu.ObjToString(v) + `":1,`
  47. } else {
  48. FieldsQuery += `"` + qu.ObjToString(v) + `":1`
  49. }
  50. }
  51. FieldData = map[string]map[string]*Data{}
  52. Compares = map[string]*Compare{}
  53. }
  54. func main() {
  55. getVersionData()
  56. createXlsx()
  57. //biaozhucompare()
  58. }
  59. func createXlsx() {
  60. xf, err := xlsx.OpenFile("template.xlsx")
  61. if err != nil {
  62. log.Println(err)
  63. return
  64. }
  65. //生成第一个sheet信息
  66. sh := xf.Sheets[0]
  67. for i, field := range Fields {
  68. for k, row := range sh.Rows {
  69. if k > 2+i {
  70. style := (*row).Cells[1].GetStyle()
  71. style.Font.Color = "000000"
  72. (*row).Cells[0].SetString(field)
  73. (*row).Cells[1].SetInt(Compares[field].PreExtNum)
  74. (*row).Cells[1].SetStyle(style)
  75. (*row).Cells[2].SetInt(Compares[field].NewExtNum)
  76. (*row).Cells[2].SetStyle(style)
  77. (*row).Cells[3].SetInt(Compares[field].EqNum)
  78. (*row).Cells[3].SetStyle(style)
  79. (*row).Cells[4].SetInt(Compares[field].NEqNum)
  80. (*row).Cells[4].SetStyle(style)
  81. }
  82. sh.Rows[k] = row
  83. }
  84. }
  85. //生成信息sheet
  86. for _, field := range Fields {
  87. sh, _ := xf.AddSheet(field)
  88. rowh := sh.AddRow()
  89. rowh.AddCell().SetString("id")
  90. rowh.AddCell().SetString("preval")
  91. rowh.AddCell().SetString("newval")
  92. rowh.AddCell().SetString("url")
  93. tmp := FieldData[field]
  94. for k, v := range tmp {
  95. if v.NewVal != v.PreVal {
  96. row := sh.AddRow()
  97. row.AddCell().SetString(k)
  98. row.AddCell().SetString(v.PreVal)
  99. row.AddCell().SetString(v.NewVal)
  100. row.AddCell().SetString(fmt.Sprintf(Url, qu.CommonEncodeArticle("content", v.Id)))
  101. }
  102. }
  103. }
  104. err = xf.Save("result.xlsx")
  105. if err != nil {
  106. log.Println("保存xlsx失败:", err)
  107. return
  108. }
  109. log.Println("xlsx保存成功")
  110. }
  111. func getVersionData() {
  112. query := bson.M{"_id": bson.M{"$gte": bson.ObjectIdHex(Sid), "$lte": bson.ObjectIdHex(Eid)}}
  113. log.Println(qu.ObjToString(SysConfig["prec"]), query)
  114. list1, _ := Premgo.Find(qu.ObjToString(SysConfig["prec"]), query, nil, `{`+FieldsQuery+`}`, false, -1, -1)
  115. for _, v := range *list1 {
  116. for _, key := range Fields {
  117. rd := FieldData[key]
  118. if rd == nil {
  119. rd = map[string]*Data{}
  120. }
  121. rd[qu.BsonIdToSId(v["_id"])] = &Data{
  122. Id: qu.BsonIdToSId(v["_id"]),
  123. PreVal: fmt.Sprint(v[key]),
  124. }
  125. FieldData[key] = rd
  126. }
  127. }
  128. log.Println("pre version 加载完成")
  129. list2, _ := Newmgo.Find(qu.ObjToString(SysConfig["newc"]), query, nil, `{`+FieldsQuery+`}`, false, -1, -1)
  130. for _, v := range *list2 {
  131. for _, field := range Fields {
  132. rd := FieldData[field]
  133. if rd == nil {
  134. rd = map[string]*Data{}
  135. }
  136. _id := qu.BsonIdToSId(v["_id"])
  137. tmp := rd[_id]
  138. if tmp != nil {
  139. tmp.NewVal = fmt.Sprint(v[field])
  140. rd[_id] = tmp
  141. } else {
  142. rd[_id] = &Data{
  143. NewVal: fmt.Sprint(v[field]),
  144. }
  145. }
  146. FieldData[field] = rd
  147. }
  148. }
  149. log.Println("new version 加载完成")
  150. for k, v := range FieldData {
  151. cp := &Compare{Field: k}
  152. for _, d := range v {
  153. if d.NewVal != "" && d.PreVal != "" {
  154. if d.NewVal == d.PreVal {
  155. cp.EqNum++
  156. } else {
  157. cp.NEqNum++
  158. }
  159. cp.PreExtNum++
  160. cp.NewExtNum++
  161. } else {
  162. if d.NewVal == "" {
  163. cp.NewNilnum++
  164. if d.PreVal != "" {
  165. cp.NEqNum++
  166. cp.PreExtNum++
  167. }
  168. }
  169. if d.PreVal == "" {
  170. cp.PreNilnum++
  171. if d.NewVal != "" {
  172. cp.NewExtNum++
  173. cp.NEqNum++
  174. }
  175. }
  176. }
  177. }
  178. Compares[k] = cp
  179. }
  180. }
  181. type BidData struct {
  182. id string
  183. key map[string]interface{}
  184. }
  185. type BidCom struct {
  186. Val []int
  187. Ids []map[string]interface{}
  188. }
  189. //标注正确率统计
  190. func biaozhucompare() {
  191. exts, _ := Newmgo.Find("bid_v3", `{}`, `{"_id":1}`, nil, false, -1, -1)
  192. extDatas := []BidData{}
  193. for _, v := range *exts {
  194. key := map[string]interface{}{
  195. "projectname": v["projectname"],
  196. "projectcode": v["projectcode"],
  197. "buyer": v["buyer"],
  198. "budget": qu.Float64All(v["budget"]),
  199. "bidamount": qu.Float64All(v["bidamount"]),
  200. "agency": v["agency"],
  201. "buyerperson": v["buyerperson"],
  202. "buyertel": v["buyertel"],
  203. }
  204. ext := BidData{
  205. id: qu.BsonIdToSId(v["_id"]),
  206. key: key,
  207. }
  208. extDatas = append(extDatas, ext)
  209. }
  210. log.Println("exts ok")
  211. bzs, _ := Newmgo.Find("bid_biaozhuid", `{}`, `{"_id":1}`, nil, false, -1, -1)
  212. bzDatas := []BidData{}
  213. for _, v := range *bzs {
  214. bidamount := float64(0)
  215. if bigprices, ok := v["bigprice"].([]interface{}); ok {
  216. bidamount = qu.Float64All(bigprices[0])
  217. }
  218. key := map[string]interface{}{
  219. "projectname": qu.ObjToString(v["projectname"]),
  220. "projectcode": qu.ObjToString(v["projectcode"]), //qu.If(qu.ObjToString(v["t_bidno"]) == "", qu.ObjToString(v["b_projectno"]), qu.ObjToString(v["t_bidno"])),
  221. "buyer": qu.ObjToString(v["buyer"]), // qu.If(qu.ObjToString(v["t_buyer"]) == "", qu.ObjToString(v["b_buyer"]), qu.ObjToString(v["t_buyer"])),
  222. "budget": qu.Float64All(qu.ObjToString(v["budget"])), // qu.Float64All(qu.ObjToString(v["t_budget"])),
  223. "bidamount": bidamount,
  224. "agency": qu.ObjToString(v["agency"]),
  225. "buyerperson": qu.ObjToString(v["buyerperson"]),
  226. "buyertel": qu.ObjToString(v["buyertel"]),
  227. }
  228. bz := BidData{
  229. id: qu.BsonIdToSId(v["_id"]),
  230. key: key,
  231. }
  232. bzDatas = append(bzDatas, bz)
  233. }
  234. log.Println("bzs ok")
  235. bcoms := map[string]*BidCom{}
  236. for _, ext := range extDatas {
  237. for _, bz := range bzDatas {
  238. if bz.id == ext.id {
  239. for key, val := range ext.key {
  240. // if key == "budget" {
  241. // log.Println(key, ext.key[key], ";;;;;", bz.key[key])
  242. // }
  243. if qu.ObjToString(val) != "" || qu.Float64All(val) > 0 {
  244. bcom := bcoms[key]
  245. if bcom == nil {
  246. bcom = &BidCom{
  247. Val: []int{0, 0},
  248. Ids: []map[string]interface{}{},
  249. }
  250. }
  251. if val == bz.key[key] {
  252. bcom.Val[0] += 1
  253. } else {
  254. bcom.Val[1] += 1
  255. tmp := map[string]interface{}{
  256. "id": ext.id,
  257. "ext": val,
  258. "bz": bz.key[key],
  259. }
  260. bcom.Ids = append(bcom.Ids, tmp)
  261. }
  262. bcoms[key] = bcom
  263. }
  264. }
  265. break
  266. }
  267. }
  268. }
  269. xl := xlsx.NewFile()
  270. sh, _ := xl.AddSheet("统计")
  271. h := sh.AddRow()
  272. h.AddCell().SetString("field")
  273. h.AddCell().SetString("相同")
  274. h.AddCell().SetString("不同")
  275. for k, v := range bcoms {
  276. row := sh.AddRow()
  277. row.AddCell().SetString(k)
  278. row.AddCell().SetInt(v.Val[0])
  279. row.AddCell().SetInt(v.Val[1])
  280. ksh, _ := xl.AddSheet(k)
  281. rh := ksh.AddRow()
  282. rh.AddCell().SetString("id")
  283. rh.AddCell().SetString("标注")
  284. rh.AddCell().SetString("抽取")
  285. rh.AddCell().SetString("url")
  286. for _, v := range v.Ids {
  287. rw := ksh.AddRow()
  288. rw.AddCell().SetString(qu.ObjToString(v["id"]))
  289. rw.AddCell().SetString(fmt.Sprint(v["bz"]))
  290. rw.AddCell().SetString(fmt.Sprint(v["ext"]))
  291. rw.AddCell().SetString(fmt.Sprintf(Url, qu.CommonEncodeArticle("content", qu.ObjToString(v["id"]))))
  292. }
  293. log.Println(k, v.Val)
  294. }
  295. xl.Save("ext_bz.xlsx")
  296. }