extract.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642
  1. package extract
  2. import (
  3. "encoding/json"
  4. "jy/clear"
  5. db "jy/mongodbutil"
  6. "jy/pretreated"
  7. ju "jy/util"
  8. "log"
  9. qu "qfw/util"
  10. "strings"
  11. "sync"
  12. "time"
  13. "gopkg.in/mgo.v2/bson"
  14. )
  15. var (
  16. lock sync.RWMutex
  17. cut = ju.NewCut() //获取正文并清理
  18. ExtLogs map[*TaskInfo][]map[string]interface{} //抽取日志
  19. TaskList map[string]*ExtractTask //任务列表
  20. saveLimit = 200 //抽取日志批量保存
  21. Fields = `{"title":1,"detail":1,"contenthtml":1,"href":1,"spidercode":1,"toptype":1,"area":1,"city":1}`
  22. )
  23. //启动抽取
  24. func StartExtractTaskId(taskId string) bool {
  25. ext := TaskList[taskId]
  26. if ext == nil {
  27. ext = &ExtractTask{}
  28. ext.Id = taskId
  29. ext.IsRun = true
  30. ext.InitTaskInfo()
  31. ext.TaskInfo.DB = db.MgoFactory(1, 3, 120, ext.TaskInfo.FromDbAddr, ext.TaskInfo.FromDB)
  32. ext.InitRulePres()
  33. ext.InitRuleBacks()
  34. ext.InitRuleCore()
  35. ext.InitTag()
  36. ext.InitClearFn()
  37. //只启动一次taskId
  38. go RunExtractTask(ext)
  39. }
  40. ext.IsRun = true
  41. TaskList[taskId] = ext
  42. return true
  43. }
  44. //停止抽取
  45. func StopExtractTaskId(taskId string) bool {
  46. ext := TaskList[taskId]
  47. if ext != nil {
  48. ext.IsRun = false
  49. TaskList[taskId] = ext
  50. }
  51. //更新task.s_extlastid
  52. db.Mgo.UpdateById("task", taskId, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
  53. return true
  54. }
  55. //开始抽取
  56. func RunExtractTask(ext *ExtractTask) {
  57. if !ext.IsRun {
  58. return
  59. }
  60. query := bson.M{"_id": bson.M{"$gt": bson.ObjectIdHex(ext.TaskInfo.LastExtId)}}
  61. list, _ := ext.TaskInfo.DB.Find(ext.TaskInfo.FromColl, query, nil, Fields, false, -1, -1)
  62. for _, v := range *list {
  63. if !ext.IsRun {
  64. break
  65. }
  66. j := PreInfo(v)
  67. ext.TaskInfo.ProcessPool <- true
  68. go ext.ExtractProcess(j)
  69. ext.TaskInfo.LastExtId = qu.BsonIdToSId(v["_id"])
  70. time.Sleep(1 * time.Second)
  71. }
  72. //更新task.s_extlastid
  73. db.Mgo.UpdateById("task", ext.Id, `{"$set":{"s_extlastid":"`+ext.TaskInfo.LastExtId+`"}}`)
  74. time.AfterFunc(30*time.Minute, func() { RunExtractTask(ext) })
  75. }
  76. //信息预处理
  77. func PreInfo(doc map[string]interface{}) *ju.Job {
  78. detail := ""
  79. d1, _ := doc["detail"].(string)
  80. d2, _ := doc["contenthtml"].(string)
  81. if len(d1) >= len(d2) || d2 == "" {
  82. detail = d1
  83. } else {
  84. detail = d2
  85. }
  86. detail = ju.CutLableStr(detail)
  87. detail = cut.ClearHtml(detail)
  88. doc["detail"] = detail
  89. href := qu.ObjToString(doc["href"])
  90. if strings.HasPrefix(href, "http://") {
  91. href = href[7:]
  92. } else if strings.HasPrefix(href, "https://") {
  93. href = href[8:]
  94. }
  95. pos := strings.Index(href, "/")
  96. if pos > 0 {
  97. href = href[:pos]
  98. }
  99. doc["domain"] = href
  100. toptype := qu.ObjToString(doc["toptype"])
  101. if qu.ObjToString(doc["type"]) == "bid" {
  102. toptype = "结果"
  103. }
  104. if toptype == "" {
  105. toptype = "*"
  106. }
  107. j := &ju.Job{
  108. SourceMid: qu.BsonIdToSId(doc["_id"]),
  109. Category: toptype,
  110. Content: qu.ObjToString(doc["detail"]),
  111. SpiderCode: qu.ObjToString(doc["spidercode"]),
  112. Domain: qu.ObjToString(doc["domain"]),
  113. Href: qu.ObjToString(doc["href"]),
  114. Title: qu.ObjToString(doc["title"]),
  115. Data: &doc,
  116. City: qu.ObjToString(doc["city"]),
  117. Province: qu.ObjToString(doc["area"]),
  118. Result: map[string][]*ju.ExtField{},
  119. }
  120. pretreated.AnalyStart(j)
  121. return j
  122. }
  123. //抽取
  124. func (e *ExtractTask) ExtractProcess(j *ju.Job) {
  125. qu.Catch()
  126. qu.Try(func() {
  127. doc := *j.Data
  128. //全局前置规则,结果覆盖doc属性
  129. for _, v := range e.RulePres {
  130. doc = ExtRegPre(doc, j, v, e.TaskInfo)
  131. }
  132. //log.Println("全局前置规则", doc)
  133. //抽取规则
  134. for _, vc := range e.RuleCores {
  135. tmp := ju.DeepCopy(doc).(map[string]interface{})
  136. //是否进入逻辑
  137. if !ju.Logic(vc.LuaLogic, tmp) {
  138. continue
  139. }
  140. //抽取-前置规则
  141. for _, v := range vc.RulePres {
  142. tmp = ExtRegPre(tmp, j, v, e.TaskInfo)
  143. }
  144. //log.Println("抽取-前置规则", tmp)
  145. //抽取-规则
  146. for _, v := range vc.RuleCores {
  147. ExtRegCore(vc.ExtFrom, tmp, j, v, e)
  148. }
  149. //log.Println("抽取-规则", tmp)
  150. //抽取-后置规则
  151. for _, v := range vc.RuleBacks {
  152. ExtRegBack(j, v, e.TaskInfo)
  153. }
  154. //log.Println("抽取-后置规则", tmp)
  155. //全局后置规则
  156. for _, v := range e.RuleBacks {
  157. ExtRegBack(j, v, e.TaskInfo)
  158. }
  159. }
  160. //函数清理
  161. for key, val := range j.Result {
  162. for _, v := range val {
  163. data := clear.DoClearFn(e.ClearFn[key], []interface{}{v.Value, j.Content})
  164. v.Value = data[0]
  165. }
  166. }
  167. bs, _ := json.Marshal(j.Result)
  168. log.Println("抽取结果", j.SourceMid, string(bs))
  169. //分析抽取结果并保存 todo
  170. AnalysisSaveResult(j.Data, j.Result, e.TaskInfo)
  171. }, func(err interface{}) {
  172. log.Println(err)
  173. <-e.TaskInfo.ProcessPool
  174. })
  175. <-e.TaskInfo.ProcessPool
  176. }
  177. //前置过滤
  178. func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInfo) map[string]interface{} {
  179. before := ju.DeepCopy(doc).(map[string]interface{})
  180. extinfo := map[string]interface{}{}
  181. if in.IsLua {
  182. lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
  183. if j != nil {
  184. lua.Block = j.Block
  185. }
  186. extinfo = lua.RunScript("pre")
  187. for k, v := range extinfo { //结果覆盖原doc
  188. doc[k] = v
  189. }
  190. AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
  191. } else {
  192. key := qu.If(in.Field == "", "detail", in.Field).(string)
  193. text := qu.ObjToString(doc[key])
  194. extinfo[key] = in.RegPreBac.Reg.ReplaceAllString(text, "")
  195. doc[key] = extinfo[key] //结果覆盖原doc
  196. AddExtLog("prereplace", j.SourceMid, before, extinfo, in, t) //抽取日志
  197. }
  198. return doc
  199. }
  200. //抽取-规则
  201. func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
  202. if in.IsLua {
  203. lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
  204. if in.IsHasFields { //lua脚本配置有属性字段
  205. lua.KvMap = getKvByLuaFields(j, in, et.Tag)
  206. } else {
  207. lua.KvMap = map[string][]map[string]interface{}{}
  208. }
  209. lua.Block = j.Block
  210. extinfo := lua.RunScript("core")
  211. for k, v := range extinfo {
  212. if j.Result[k] == nil {
  213. j.Result[k] = [](*ju.ExtField){}
  214. }
  215. if tmps, ok := v.([]map[string]interface{}); ok {
  216. for _, tmp := range tmps {
  217. j.Result[k] = append(j.Result[k],
  218. &ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"]})
  219. }
  220. }
  221. }
  222. if len(extinfo) > 0 {
  223. AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
  224. }
  225. } else {
  226. //全文正则
  227. text := qu.ObjToString(doc[extfrom])
  228. if in.Field != "" {
  229. extinfo := extRegCoreToResult(extfrom, text, j, in)
  230. if len(extinfo) > 0 {
  231. AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
  232. }
  233. }
  234. }
  235. }
  236. //lua脚本根据属性设置提取kv值
  237. func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string][]map[string]interface{} {
  238. kvmap := map[string][]map[string]interface{}{}
  239. for _, vv := range in.LFields {
  240. field := qu.ObjToString(vv)
  241. tags := t[qu.ObjToString(vv)] //获取对应标签库
  242. for _, bl := range j.Block {
  243. //冒号kv
  244. if bl.ColonKV != nil {
  245. kvs := bl.ColonKV.Kvs
  246. kvs2 := bl.ColonKV.Kvs_2
  247. for _, tag := range tags {
  248. for _, kv := range kvs {
  249. if tag.Type == "string" {
  250. if kv.Key == tag.Key {
  251. text := ju.TrimLRSpace(kv.Value, "")
  252. if text != "" {
  253. kvmap[field] = append(kvmap[field], map[string]interface{}{
  254. "value": text,
  255. "type": "colon1",
  256. "field": field,
  257. "key": tag.Key,
  258. "matchtype": "tag_string",
  259. })
  260. }
  261. break
  262. }
  263. } else if tag.Type == "regexp" {
  264. if tag.Reg.MatchString(kv.Key) {
  265. text := ju.TrimLRSpace(kv.Value, "")
  266. if text != "" {
  267. kvmap[field] = append(kvmap[field], map[string]interface{}{
  268. "value": text,
  269. "type": "colon1",
  270. "field": field,
  271. "key": tag.Key,
  272. "matchtype": "tag_regexp",
  273. })
  274. }
  275. break
  276. }
  277. }
  278. }
  279. for _, kv := range kvs2 {
  280. if tag.Type == "string" {
  281. if kv.Key == tag.Key {
  282. text := ju.TrimLRSpace(kv.Value, "")
  283. if text != "" {
  284. kvmap[field] = append(kvmap[field], map[string]interface{}{
  285. "value": text,
  286. "type": "colon2",
  287. "field": field,
  288. "key": tag.Key,
  289. "matchtype": "tag_string",
  290. })
  291. }
  292. break
  293. }
  294. } else if tag.Type == "regexp" {
  295. if tag.Reg.MatchString(kv.Key) {
  296. text := ju.TrimLRSpace(kv.Value, "")
  297. if text != "" {
  298. kvmap[field] = append(kvmap[field], map[string]interface{}{
  299. "value": text,
  300. "type": "colon2",
  301. "field": field,
  302. "key": tag.Key,
  303. "matchtype": "tag_regexp",
  304. })
  305. }
  306. break
  307. }
  308. }
  309. }
  310. }
  311. }
  312. //空格kv
  313. if bl.SpaceKV != nil {
  314. kvs := bl.SpaceKV.Kvs
  315. for _, tag := range tags {
  316. for _, kv := range kvs {
  317. if tag.Type == "string" {
  318. if kv.Key == tag.Key {
  319. text := ju.TrimLRSpace(kv.Value, "")
  320. if text != "" {
  321. kvmap[field] = append(kvmap[field], map[string]interface{}{
  322. "value": text,
  323. "type": "space",
  324. "field": field,
  325. "key": tag.Key,
  326. "matchtype": "tag_string",
  327. })
  328. }
  329. break
  330. }
  331. } else if tag.Type == "regexp" {
  332. if tag.Reg.MatchString(kv.Key) {
  333. text := ju.TrimLRSpace(kv.Value, "")
  334. if text != "" {
  335. kvmap[field] = append(kvmap[field], map[string]interface{}{
  336. "value": text,
  337. "type": "space",
  338. "field": field,
  339. "key": tag.Key,
  340. "matchtype": "tag_regexp",
  341. })
  342. }
  343. break
  344. }
  345. }
  346. }
  347. }
  348. }
  349. //表格kv
  350. if bl.TableKV != nil {
  351. kv := bl.TableKV.Kv
  352. for _, tag := range tags {
  353. for k, val := range kv {
  354. if tag.Type == "string" {
  355. if k == tag.Key {
  356. text := ju.TrimLRSpace(val, "")
  357. if text != "" {
  358. kvmap[field] = append(kvmap[field], map[string]interface{}{
  359. "value": text,
  360. "type": "table",
  361. "field": field,
  362. "key": tag.Key,
  363. "matchtype": "tag_string",
  364. })
  365. }
  366. break
  367. }
  368. } else if tag.Type == "regexp" {
  369. if tag.Reg.MatchString(k) {
  370. text := ju.TrimLRSpace(val, "")
  371. if text != "" {
  372. kvmap[field] = append(kvmap[field], map[string]interface{}{
  373. "value": text,
  374. "type": "table",
  375. "field": field,
  376. "key": tag.Key,
  377. "matchtype": "tag_regexp",
  378. })
  379. }
  380. break
  381. }
  382. }
  383. }
  384. }
  385. }
  386. }
  387. }
  388. return kvmap
  389. }
  390. //正则提取结果
  391. func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[string]interface{} {
  392. extinfo := map[string]interface{}{}
  393. if v.RegCore.Bextract { //正则是两部分的,可以直接抽取的(含下划线)
  394. apos := v.RegCore.Reg.FindAllStringSubmatchIndex(text, -1)
  395. if len(apos) > 0 {
  396. pos := apos[0]
  397. for k, p := range v.RegCore.ExtractPos {
  398. if len(pos) > p {
  399. if pos[p] == -1 || pos[p+1] == -1 {
  400. continue
  401. }
  402. val := text[pos[p]:pos[p+1]]
  403. extinfo[k] = map[string]interface{}{
  404. "field": v.Field,
  405. "key": v.Code,
  406. "type": "regexp",
  407. "matchtype": "regcontent",
  408. "extfrom": extfrom,
  409. "value": val,
  410. }
  411. if val != "" {
  412. if j.Result[v.Field] == nil {
  413. j.Result[k] = [](*ju.ExtField){}
  414. }
  415. j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, "regexp", "regcontent", extfrom, val})
  416. }
  417. }
  418. }
  419. }
  420. } else {
  421. val := v.RegCore.Reg.ReplaceAllString(text, "")
  422. if val != "" {
  423. extinfo[v.Field] = map[string]interface{}{
  424. "field": v.Field,
  425. "key": v.Code,
  426. "type": "regexp",
  427. "matchtype": "regcontent",
  428. "extfrom": extfrom,
  429. "value": val,
  430. }
  431. if j.Result[v.Field] == nil {
  432. j.Result[v.Field] = [](*ju.ExtField){}
  433. }
  434. j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, "regexp", "regcontent", extfrom, val})
  435. }
  436. }
  437. return extinfo
  438. }
  439. //后置过滤
  440. func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
  441. if in.IsLua {
  442. result := getResultMapForLua(j)
  443. lua := ju.LuaScript{Code: in.Code, Name: in.Name, Result: result, Script: in.RuleText}
  444. if j != nil {
  445. lua.Block = j.Block
  446. }
  447. extinfo := lua.RunScript("back")
  448. for k, v := range extinfo {
  449. if tmps, ok := v.([]map[string]interface{}); ok {
  450. j.Result[k] = [](*ju.ExtField){}
  451. for _, tmp := range tmps {
  452. j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"]})
  453. }
  454. }
  455. }
  456. if len(extinfo) > 0 {
  457. AddExtLog("clear", j.SourceMid, result, extinfo, in, t) //抽取日志
  458. }
  459. } else {
  460. extinfo := map[string]interface{}{}
  461. if in.Field != "" && j.Result[in.Field] != nil {
  462. tmp := j.Result[in.Field]
  463. exts := []interface{}{}
  464. for k, v := range tmp {
  465. text := qu.ObjToString(v.Value)
  466. if text != "" {
  467. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  468. }
  469. j.Result[in.Field][k].Value = text
  470. exts = append(exts, map[string]interface{}{
  471. "field": v.Field,
  472. "key": v.Key,
  473. "type": v.Type,
  474. "matchtype": v.MatchType,
  475. "extfrom": v.ExtFrom,
  476. "value": text,
  477. })
  478. }
  479. extinfo[in.Field] = exts
  480. if len(extinfo) > 0 {
  481. AddExtLog("clear", j.SourceMid, tmp, extinfo, in, t) //抽取日志
  482. }
  483. } else {
  484. for key, tmp := range j.Result {
  485. exts := []interface{}{}
  486. for k, v := range tmp {
  487. text := qu.ObjToString(v.Value)
  488. if text != "" {
  489. text = in.RegPreBac.Reg.ReplaceAllString(text, in.RegPreBac.Replace)
  490. }
  491. j.Result[key][k].Value = text
  492. exts = append(exts, map[string]interface{}{
  493. "field": v.Field,
  494. "key": v.Key,
  495. "type": v.Type,
  496. "matchtype": v.MatchType,
  497. "extfrom": v.ExtFrom,
  498. "value": text,
  499. })
  500. }
  501. extinfo[key] = exts
  502. }
  503. if len(extinfo) > 0 {
  504. AddExtLog("clear", j.SourceMid, j.Result, extinfo, in, t) //抽取日志
  505. }
  506. }
  507. }
  508. }
  509. //获取抽取结果map[string][]interface{},lua脚本使用
  510. func getResultMapForLua(j *ju.Job) map[string][]map[string]interface{} {
  511. result := map[string][]map[string]interface{}{}
  512. for key, val := range j.Result {
  513. if result[key] == nil {
  514. result[key] = []map[string]interface{}{}
  515. }
  516. for _, v := range val {
  517. tmp := map[string]interface{}{
  518. "field": v.Field,
  519. "value": v.Value,
  520. "type": v.Type,
  521. "matchtype": v.MatchType,
  522. "key": v.Key,
  523. "extfrom": v.ExtFrom,
  524. }
  525. result[key] = append(result[key], tmp)
  526. }
  527. }
  528. return result
  529. }
  530. //抽取日志
  531. func AddExtLog(ftype, sid string, before interface{}, extinfo interface{}, v *RegLuaInfo, t *TaskInfo) {
  532. if !t.IsEtxLog {
  533. return
  534. }
  535. logdata := map[string]interface{}{
  536. "code": v.Code,
  537. "name": v.Name,
  538. "type": ftype,
  539. "ruletext": v.RuleText,
  540. "islua": v.IsLua,
  541. "field": v.Field,
  542. "version": t.Version,
  543. "taskname": t.Name,
  544. "before": before,
  545. "extinfo": extinfo,
  546. "sid": sid,
  547. "comeintime": time.Now().Unix(),
  548. }
  549. lock.Lock()
  550. ExtLogs[t] = append(ExtLogs[t], logdata)
  551. lock.Unlock()
  552. }
  553. //保存抽取日志
  554. func SaveExtLog() {
  555. tmpLogs := map[*TaskInfo][]map[string]interface{}{}
  556. lock.Lock()
  557. tmpLogs = ExtLogs
  558. ExtLogs = map[*TaskInfo][]map[string]interface{}{}
  559. lock.Unlock()
  560. for k, v := range tmpLogs {
  561. if len(v) < saveLimit {
  562. db.Mgo.SaveBulk(k.TrackColl, v...)
  563. } else {
  564. for {
  565. if len(v) > saveLimit {
  566. tmp := v[:saveLimit]
  567. db.Mgo.SaveBulk(k.TrackColl, tmp...)
  568. v = v[saveLimit:]
  569. } else {
  570. db.Mgo.SaveBulk(k.TrackColl, v...)
  571. break
  572. }
  573. }
  574. }
  575. }
  576. time.AfterFunc(10*time.Second, SaveExtLog)
  577. }
  578. type FieldValue struct {
  579. Value interface{}
  580. Count int
  581. }
  582. //分析抽取结果并保存
  583. func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.ExtField, task *TaskInfo) {
  584. _id := qu.BsonIdToSId((*doc)["_id"])
  585. //结果排序
  586. values := map[string][]*ju.SortObject{}
  587. for key, val := range result {
  588. fieldValue := map[string]int{}
  589. for _, v := range val {
  590. value := qu.ObjToString(v.Value)
  591. fieldValue[value] += 1
  592. }
  593. objects := []*ju.SortObject{}
  594. for k, v := range fieldValue {
  595. tmp := &ju.SortObject{
  596. Key: k,
  597. Value: v,
  598. }
  599. objects = append(objects, tmp)
  600. }
  601. values[key] = ju.ExtSort(objects)
  602. }
  603. //从排序结果中取值
  604. tmp := map[string]interface{}{}
  605. for key, val := range values {
  606. for _, v := range val { //取第一个
  607. if v.Key != "" {
  608. tmp[key] = v.Key
  609. break
  610. }
  611. }
  612. }
  613. //保存抽取结果
  614. task.DB.Update(task.SaveColl, `{"_id":"`+_id+`"}`, doc, true, false)
  615. //保存抽取详情
  616. tmp["result"] = result
  617. db.Mgo.Update("extract_result", `{"_id":"`+_id+`"}`, tmp, true, false)
  618. }