spider.go 65 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996
  1. package front
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "log"
  7. mu "mfw/util"
  8. "mongodb"
  9. qu "qfw/util"
  10. "sort"
  11. "spider"
  12. sp "spiderutil"
  13. "strconv"
  14. "strings"
  15. "time"
  16. u "util"
  17. )
  18. type Base struct {
  19. SpiderCode string
  20. SpiderCodeOld string
  21. SpiderName string
  22. SpiderChannel string
  23. SpiderDownDetailPage bool
  24. SpiderStartPage int
  25. SpiderMaxPage int
  26. SpiderRunRate int
  27. Spider2Collection string
  28. SpiderPageEncoding string
  29. SpiderStoreMode int //1,2
  30. SpiderStoreToMsgEvent int
  31. SpiderTargetChannelUrl string
  32. SpiderLastDownloadTime string
  33. SpiderIsHistoricalMend bool
  34. SpiderIsMustDownload bool
  35. }
  36. type Step1 struct {
  37. Address string
  38. ContentChooser string
  39. DateFormat string
  40. Expert string
  41. Types int
  42. }
  43. type Step2 struct {
  44. Listadd string
  45. Listadds string
  46. BlockChooser string
  47. AddressChooser string
  48. TitleChooser string
  49. DateChooser string
  50. DateFormat string
  51. Expert string
  52. Types int
  53. //chromedp相关
  54. Chrome string
  55. RangeChrome string
  56. RangeTimes int
  57. }
  58. type Step3 struct {
  59. ContentChooser string
  60. ElementChooser string
  61. T_title string
  62. T_href string
  63. T_date string
  64. Expert string
  65. Types int
  66. Chrome string
  67. }
  68. type StepRe3 struct {
  69. Checked bool
  70. Expert string
  71. }
  72. type OtherBase struct {
  73. IsFlow int //爬虫所采集数据是否参与数据流程标识
  74. SpiderType string //爬虫类型:increment增量;history历史
  75. SpiderHistoryMaxPage int //采集历史数据时的采集最大页
  76. SpiderMoveEvent string //爬虫采集完历史后要转移到的节点 comm:队列模式、bid:高性能模式
  77. }
  78. // 加载某个爬虫
  79. func (f *Front) LoadSpider(codeTaskIdReState string) error {
  80. tmpStr := strings.Split(codeTaskIdReState, "__")
  81. code := tmpStr[0]
  82. text := tmpStr[1]
  83. auth := qu.IntAll(f.GetSession("auth"))
  84. restate := -1
  85. if text == "1" { //重采编辑
  86. restate = 1
  87. } else if text == "2" {
  88. restate = 2
  89. } else if text == "3" {
  90. restate = 3
  91. } else if text != "bu" {
  92. if auth == u.Role_Dev && qu.ObjToString(f.GetSession(text)) == "" {
  93. xgTime := time.Unix(time.Now().Unix(), 0).Format("2006-01-02 15:04:05")
  94. f.SetSession(text, xgTime)
  95. }
  96. }
  97. copy := f.GetString("copy")
  98. if f.Method() == "GET" {
  99. code := sp.Se.Decode4Hex(code)
  100. f.T["actiontext"] = "编辑"
  101. //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
  102. lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
  103. //爬虫开发人员编辑爬虫,修改爬虫信息
  104. if auth == u.Role_Dev && text != "bu" && qu.ObjToString((*lua)["spidertype"]) == "increment" {
  105. (*lua)["spidertype"] = "history"
  106. (*lua)["incrementevent"] = (*lua)["event"]
  107. }
  108. if qu.ObjToString((*lua)["createuserid"]) == f.GetSession("userid").(string) || auth >= 1 {
  109. if len(*lua) > 0 {
  110. if qu.IntAll((*lua)["event"]) == 7000 && qu.IntAll((*lua)["urgency"]) == 0 && qu.IntAll((*lua)["state"]) == 0 {
  111. q := map[string]interface{}{
  112. "event": 7000,
  113. "state": 0,
  114. "urgency": 1,
  115. "modifyuserid": f.GetSession("userid"),
  116. }
  117. if u.MgoEB.Count("luaconfig", q) > 0 {
  118. f.Write("名下还有7000节点待完成的紧急爬虫,暂无法处理该爬虫!")
  119. return nil
  120. }
  121. }
  122. if copy != "" {
  123. //luacopy, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": copy})
  124. luacopy, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": copy})
  125. if len(*luacopy) > 0 {
  126. (*lua)["model"] = (*luacopy)["model"]
  127. common_copy := (*luacopy)["param_common"].([]interface{})
  128. common := (*lua)["param_common"].([]interface{})
  129. common_copy[0] = common[0]
  130. common_copy[1] = common[1]
  131. common_copy[2] = common[2]
  132. common_copy[11] = common[11]
  133. (*lua)["param_common"] = (*luacopy)["param_common"]
  134. (*lua)["param_time"] = (*luacopy)["param_time"]
  135. (*lua)["param_list"] = (*luacopy)["param_list"]
  136. (*lua)["param_content"] = (*luacopy)["param_content"]
  137. (*lua)["str_list"] = (*luacopy)["str_list"]
  138. (*lua)["str_time"] = (*luacopy)["str_time"]
  139. (*lua)["str_content"] = (*luacopy)["str_content"]
  140. (*lua)["Thref"] = (*luacopy)["Thref"]
  141. (*lua)["Tpublishtime"] = (*luacopy)["Tpublishtime"]
  142. (*lua)["Ttitle"] = (*luacopy)["Ttitle"]
  143. (*lua)["Tdate"] = (*luacopy)["Tdate"]
  144. (*lua)["type_content"] = (*luacopy)["type_content"]
  145. (*lua)["type_list"] = (*luacopy)["type_list"]
  146. (*lua)["type_time"] = (*luacopy)["type_time"]
  147. }
  148. }
  149. if (*lua)["listcheck"] != nil {
  150. listcheck := (*lua)["listcheck"].(string)
  151. listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
  152. listcheck = strings.Replace(listcheck, "\\", "", -1)
  153. (*lua)["listcheck"] = listcheck
  154. }
  155. if (*lua)["contentcheck"] != nil {
  156. contentcheck := (*lua)["contentcheck"].(string)
  157. contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
  158. contentcheck = strings.Replace(contentcheck, "\\", "", -1)
  159. (*lua)["contentcheck"] = contentcheck
  160. }
  161. js, _ := json.MarshalIndent((*lua)["model"], "", " ")
  162. (*lua)["js"] = string(js)
  163. f.T["lua"] = lua
  164. f.T["taskId"] = text
  165. f.T["restate"] = restate
  166. f.T["isflow"] = (*lua)["isflow"]
  167. f.T["spidertype"] = (*lua)["spidertype"]
  168. f.T["spidermovevent"] = (*lua)["spidermovevent"]
  169. f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"]
  170. f.T["spiderremark"] = (*lua)["spiderremark"]
  171. f.T["identity"] = f.GetSession("identity")
  172. events := []string{}
  173. for k, _ := range sp.Config.Uploadevents {
  174. events = append(events, k)
  175. }
  176. sort.Strings(events)
  177. f.T["events"] = events
  178. if (*lua)["oldlua"] != nil {
  179. return f.Render("oldedit.html", &f.T)
  180. }
  181. f.T["areas"] = u.Area //
  182. f.T["citys"] = u.City //
  183. f.T["provinces"] = u.Province //
  184. //查询爬虫是否有列表页处理中的异常任务
  185. f.T["clearchannel"] = u.MgoEB.Count("task", map[string]interface{}{"s_code": code, "s_type": "1", "i_state": 2}) > 0
  186. return f.Render("spideredit.html", &f.T)
  187. }
  188. } else {
  189. f.Write("您没有编辑他人脚本的权限")
  190. }
  191. }
  192. return nil
  193. }
  194. // 查看某个爬虫
  195. func (f *Front) ViewSpider(id string) error {
  196. auth := qu.IntAll(f.GetSession("auth"))
  197. if auth >= 1 {
  198. if f.Method() == "GET" {
  199. code := sp.Se.Decode4Hex(id)
  200. f.T["actiontext"] = "编辑"
  201. //lua, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
  202. lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
  203. if len(*lua) > 0 {
  204. if (*lua)["listcheck"] != nil {
  205. listcheck := (*lua)["listcheck"].(string)
  206. listcheck = strings.Replace(listcheck, "\\n", "\n", -1)
  207. listcheck = strings.Replace(listcheck, "\\", "", -1)
  208. (*lua)["listcheck"] = listcheck
  209. }
  210. if (*lua)["contentcheck"] != nil {
  211. contentcheck := (*lua)["contentcheck"].(string)
  212. contentcheck = strings.Replace(contentcheck, "\\n", "\n", -1)
  213. contentcheck = strings.Replace(contentcheck, "\\", "", -1)
  214. (*lua)["contentcheck"] = contentcheck
  215. }
  216. js, _ := json.MarshalIndent((*lua)["model"], "", " ")
  217. (*lua)["js"] = string(js)
  218. f.T["lua"] = lua
  219. f.T["isflow"] = (*lua)["isflow"]
  220. f.T["spidertype"] = (*lua)["spidertype"]
  221. f.T["spidermovevent"] = (*lua)["spidermovevent"]
  222. f.T["spiderhistorymaxpage"] = (*lua)["spiderhistorymaxpage"]
  223. events := []string{}
  224. for k, _ := range sp.Config.Uploadevents {
  225. events = append(events, k)
  226. }
  227. sort.Strings(events)
  228. f.T["events"] = events
  229. if (*lua)["oldlua"] != nil {
  230. return f.Render("oldedit.html", &f.T)
  231. }
  232. f.T["areas"] = u.Area //
  233. f.T["citys"] = u.City //
  234. f.T["provinces"] = u.Province //
  235. f.T["spiderremark"] = (*lua)["spiderremark"]
  236. return f.Render("spiderview.html", &f.T)
  237. } else {
  238. f.Write("没有对应记录!")
  239. return nil
  240. }
  241. }
  242. return f.Redirect("/center")
  243. } else {
  244. f.Write("您没有查看他人脚本的权限")
  245. return nil
  246. }
  247. }
  248. func (f *Front) LoadModel(id string) error {
  249. if f.Method() == "GET" {
  250. //lua, _ := u.MgoE.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1)
  251. lua, _ := u.MgoEB.Find("luaconfig", map[string]interface{}{"code": id}, nil, map[string]interface{}{"model": 1}, true, -1, -1)
  252. if len(*lua) > 0 {
  253. f.ServeJson((*lua)[0])
  254. }
  255. }
  256. return f.Redirect("/center")
  257. }
  258. func (f *Front) SaveStep() {
  259. rep := map[string]interface{}{}
  260. if f.Step2.Types == 2 || f.Step3.Types == 2 { //chrome模式只支持转成专家模式保存
  261. rep["msg"] = "爬虫不支持chrome模式保存"
  262. f.ServeJson(rep)
  263. return
  264. }
  265. userid, _ := f.GetSession("userid").(string)
  266. auth := qu.IntAll(f.GetSession("auth"))
  267. if f.GetString("oldlua") != "" {
  268. id := f.GetString("code")
  269. //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": id})
  270. one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": id})
  271. id = qu.ObjToString((*one)["code"])
  272. script := f.GetStringComm("script")
  273. if strings.Index(script, id) == -1 {
  274. rep["msg"] = "code/名称都不能更改"
  275. f.ServeJson(rep)
  276. return
  277. } else {
  278. upset := map[string]interface{}{"luacontent": script}
  279. upset["modifytime"] = time.Now().Unix()
  280. //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false)
  281. b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": id}, map[string]interface{}{"$set": upset}, true, false)
  282. if b {
  283. rep["msg"] = "保存成功"
  284. rep["code"] = sp.Se.Encode2Hex(id)
  285. f.ServeJson(rep)
  286. return
  287. }
  288. }
  289. } else {
  290. if f.Base.SpiderName != "" && f.Base.SpiderCode != "" {
  291. code := f.Base.SpiderCode
  292. //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode})
  293. one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": f.Base.SpiderCode})
  294. //记录上架操作前的第一次保存时的爬虫历史
  295. user := f.GetSession("username").(string)
  296. LuaSaveLog(f.Base.SpiderCode, user, one, 0)
  297. state := qu.IntAllDef((*one)["state"], 0)
  298. restate := qu.IntAll((*one)["restate"])
  299. infoformat := qu.IntAll((*one)["infoformat"])
  300. comeintime := time.Now().Unix()
  301. if len((*one)) > 0 {
  302. comeintime = qu.Int64All((*one)["comeintime"])
  303. ouserid := qu.ObjToString((*one)["createuserid"])
  304. if ouserid != userid && auth == u.Role_Dev {
  305. f.Write("权限不够,不能修改他人脚本")
  306. return
  307. } else {
  308. code = qu.ObjToString((*one)["code"])
  309. f.Base.SpiderCode = code
  310. f.Base.SpiderName = ((*one)["param_common"].([]interface{}))[1].(string)
  311. }
  312. } else {
  313. if auth != u.Role_Admin {
  314. f.Write("不能新建爬虫,请联系管理员导入")
  315. return
  316. }
  317. }
  318. listcheck := f.GetString("listcheck")
  319. contentcheck := f.GetString("contentcheck")
  320. if auth == u.Role_Dev {
  321. //f.Base.SpiderStoreToMsgEvent = 4002
  322. }
  323. common := []interface{}{
  324. f.Base.SpiderCode,
  325. f.Base.SpiderName,
  326. f.Base.SpiderChannel,
  327. f.Base.SpiderDownDetailPage,
  328. f.Base.SpiderStartPage,
  329. f.Base.SpiderMaxPage,
  330. f.Base.SpiderRunRate,
  331. f.Base.Spider2Collection,
  332. f.Base.SpiderPageEncoding,
  333. f.Base.SpiderStoreMode,
  334. f.Base.SpiderStoreToMsgEvent,
  335. f.Base.SpiderTargetChannelUrl,
  336. f.Base.SpiderLastDownloadTime,
  337. f.Base.SpiderIsHistoricalMend,
  338. f.Base.SpiderIsMustDownload,
  339. }
  340. ptime := []interface{}{
  341. f.Step1.DateFormat,
  342. f.Step1.Address,
  343. f.Step1.ContentChooser,
  344. }
  345. list := []interface{}{
  346. f.Step2.Listadd,
  347. f.Step2.Listadds,
  348. f.Step2.BlockChooser,
  349. f.Step2.AddressChooser,
  350. f.Step2.TitleChooser,
  351. f.Step2.DateChooser,
  352. f.Step2.DateFormat,
  353. }
  354. content := []interface{}{
  355. f.Step3.ContentChooser,
  356. f.Step3.ElementChooser,
  357. }
  358. param_list_chrome, param_list_rangechrome, param_content_chrome := []sp.ChromeActions{}, []sp.ChromeActions{}, []sp.ChromeActions{}
  359. json.Unmarshal([]byte(f.Step2.Chrome), &param_list_chrome)
  360. json.Unmarshal([]byte(f.Step2.RangeChrome), &param_list_rangechrome)
  361. json.Unmarshal([]byte(f.Step3.Chrome), &param_content_chrome)
  362. param := map[string]interface{}{}
  363. common[4] = 1
  364. param["param_common"] = common
  365. param["channel"] = f.Base.SpiderChannel
  366. param["href"] = f.Base.SpiderTargetChannelUrl
  367. //向导模式
  368. param["param_time"] = ptime
  369. param["param_list"] = list
  370. param["param_list_chrome"] = param_list_chrome
  371. param["param_list_rangechrome"] = param_list_rangechrome
  372. param["param_list_rangetimes"] = f.Step2.RangeTimes
  373. param["param_content_chrome"] = param_content_chrome
  374. param["param_content"] = content
  375. param["type_time"] = f.Step1.Types
  376. param["type_list"] = f.Step2.Types
  377. param["type_content"] = f.Step3.Types
  378. //专家模式
  379. param["str_time"] = f.Step1.Expert
  380. param["str_list"] = f.Step2.Expert
  381. param["str_list_chrome"] = f.Step2.Chrome
  382. param["str_list_rangechrome"] = f.Step2.RangeChrome
  383. param["str_content_chrome"] = f.Step3.Chrome
  384. param["str_content"] = f.Step3.Expert
  385. param["comeintime"] = comeintime
  386. listcheck = strings.Replace(listcheck, "\n", "\\\\n", -1)
  387. param["listcheck"] = strings.Replace(listcheck, "\"", "\\\\\"", -1)
  388. contentcheck = strings.Replace(contentcheck, "\n", "\\\\n", -1)
  389. param["contentcheck"] = strings.Replace(contentcheck, "\"", "\\\\\"", -1)
  390. //补充模型
  391. s_model := f.GetString("model")
  392. configModel := sp.Config.Model[s_model]
  393. model := map[string]interface{}{}
  394. for k, _ := range configModel {
  395. model[k] = f.GetString(k)
  396. }
  397. model["model"] = s_model
  398. param["code"] = f.Base.SpiderCode
  399. param["model"] = model
  400. if len((*one)) > 0 {
  401. param["createuser"] = (*one)["createuser"]
  402. param["createuserid"] = (*one)["createuserid"]
  403. param["code"] = (*one)["code"]
  404. //开发员关联任务修改爬虫状态
  405. state = qu.IntAll((*one)["state"])
  406. if auth == u.Role_Dev && state >= Sp_state_3 && restate != 1 { //开发员修改,已经审核通过(不包含已上架),状态重置为待完成(restate!=1判断,重采修改保存爬虫时不修改爬虫状态)
  407. param["state"] = 0
  408. } else {
  409. param["state"] = state
  410. }
  411. } else {
  412. param["createuser"] = f.GetSession("loginuser")
  413. param["createuserid"] = f.GetSession("userid")
  414. param["createuseremail"] = f.GetSession("email")
  415. param["next"] = f.GetSession("email")
  416. param["state"] = 0
  417. }
  418. if qu.ObjToString((*one)["modifyuser"]) == "" {
  419. param["modifyuser"] = param["createuser"]
  420. param["modifyuserid"] = param["createuserid"]
  421. }
  422. param["modifytime"] = time.Now().Unix()
  423. param["Ttitle"] = f.Step3.T_title
  424. param["Thref"] = f.Step3.T_href
  425. param["Tdate"] = f.Step3.T_date
  426. //其他信息
  427. param["isflow"] = f.OtherBase.IsFlow
  428. param["spidertype"] = f.OtherBase.SpiderType
  429. param["spiderhistorymaxpage"] = f.OtherBase.SpiderHistoryMaxPage
  430. tmpEvent, err := strconv.Atoi(f.OtherBase.SpiderMoveEvent) //f.OtherBase.SpiderMoveEvent此处SpiderMoveEvent已不表示comm、bid,表示增量的节点
  431. msgResult := map[string]string{}
  432. if f.Base.SpiderMaxPage == 1 {
  433. msgResult["warn"] = "提醒,增量采集页过小,请再次核对!"
  434. }
  435. //其他校验
  436. if f.Base.SpiderChannel == "" {
  437. msgResult["err"] = "栏目名称为空!"
  438. }
  439. if f.Base.SpiderTargetChannelUrl == "" {
  440. msgResult["err"] = "栏目地址为空!;" + msgResult["err"]
  441. }
  442. if f.OtherBase.SpiderType == "history" { //爬虫类型是history的放到7000节点,并记录历史节点
  443. var historyMaxPageWarn string
  444. if f.OtherBase.SpiderHistoryMaxPage > 500 { //新爬虫跑历史超过500页提醒新建复制爬虫
  445. historyMaxPageWarn = "提醒,历史页过大,考虑复制爬虫!"
  446. } else if f.OtherBase.SpiderHistoryMaxPage == 1 {
  447. historyMaxPageWarn = "提醒,历史页过小,请再次核对!"
  448. }
  449. if historyMaxPageWarn != "" {
  450. msgResult["warn"] = historyMaxPageWarn + ";" + msgResult["warn"]
  451. }
  452. param["event"] = 7000
  453. if err == nil {
  454. param["incrementevent"] = tmpEvent //开发人员切换增量节点
  455. } else if event := qu.IntAll((*one)["event"]); event != 7000 { //默认增量节点
  456. param["incrementevent"] = event
  457. }
  458. param["urgency"] = 1 //保存到7000时,爬虫紧急度变为紧急(控制7000节点爬虫紧急未写完不能写普通)
  459. if (*one)["downevent"] == nil {
  460. param["downevent"] = qu.IntAll((*one)["event"]) //当前节点
  461. }
  462. } else if f.OtherBase.SpiderType == "increment" && err == nil { //增量
  463. param["event"] = tmpEvent //开发人员切换增量节点
  464. //开发人员修改爬虫节点后,在审核人员上架时,要在原来的节点下架,临时记录要下架的节点downevent
  465. if event := qu.IntAll((*one)["event"]); event != tmpEvent && (*one)["downevent"] == nil {
  466. param["downevent"] = event
  467. }
  468. }
  469. if movevent, ok := sp.Config.Uploadevents[f.OtherBase.SpiderMoveEvent].(string); ok && movevent != "" {
  470. param["spidermovevent"] = movevent
  471. }
  472. //三级页复制
  473. param["str_recontent"] = f.StepRe3.Expert
  474. param["iscopycontent"] = f.StepRe3.Checked
  475. //
  476. param["listisfilter"] = u.ListFilterReg.MatchString(f.Step2.Expert) //列表页校验是否含“--关键词过滤”
  477. param["projecthref"] = ProjectHrefReg.MatchString(f.Step2.Expert) || ProjectHrefReg.MatchString(f.Step3.Expert)
  478. checkLua := LuaTextCheck(infoformat, param, param_list_chrome, param_list_rangechrome, param_content_chrome, msgResult)
  479. //checkLua := LuaTextCheck(f.Base.SpiderCode, f.Step2.Expert, f.Step3.Expert, f.Step2.Types, infoformat, model, msgResult)
  480. if !checkLua {
  481. issave := spider.SaveSpider(code, param) //保存脚本
  482. if issave {
  483. for k, v := range *one {
  484. if k != "_id" && param[k] == nil {
  485. param[k] = v
  486. }
  487. }
  488. Wlog(f.Base.SpiderName, f.Base.SpiderCode, user, f.GetSession("userid").(string), "修改", param)
  489. if warn := msgResult["warn"]; warn != "" {
  490. rep["msg"] = "保存成功;" + msgResult["warn"]
  491. }
  492. rep["ok"] = true
  493. } else {
  494. rep["msg"] = "保存失败"
  495. rep["ok"] = false
  496. }
  497. } else {
  498. rep["msg"] = "保存失败," + msgResult["err"]
  499. rep["ok"] = false
  500. }
  501. rep["code"] = sp.Se.Encode2Hex(code)
  502. f.ServeJson(rep)
  503. }
  504. }
  505. }
  506. func (f *Front) SaveJs() {
  507. //param_type := f.GetString("param_type")
  508. //return_type := f.GetString("return_type")
  509. js_name := f.GetString("js_name")
  510. js_alias := f.GetString("js_alias")
  511. jstext := f.GetString("jstext")
  512. step := f.GetString("step")
  513. code := f.GetString("code")
  514. update := map[string]interface{}{
  515. "runjs": true,
  516. }
  517. js := map[string]interface{}{
  518. "js_text": jstext,
  519. "js_name": js_name,
  520. "js_step": step,
  521. "js_alias": js_alias,
  522. "js_param": "string",
  523. "js_return": "string",
  524. }
  525. if step == "list" {
  526. update["js_list"] = js
  527. } else if step == "detail" {
  528. update["js_detail"] = js
  529. }
  530. u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": update}, false, false)
  531. f.ServeJson(map[string]interface{}{"ok": true})
  532. }
  533. // LuaCheckSaveLog 保存机检日志
  534. func LuaCheckSaveLog(lua *map[string]interface{}) {
  535. u.MgoEB.Save("luachecklog", map[string]interface{}{
  536. "code": (*lua)["code"],
  537. "check": (*lua)["check"],
  538. "checkok": (*lua)["checkok"],
  539. "checktime": (*lua)["checktime"],
  540. "comeintime": time.Now().Unix(),
  541. })
  542. }
  543. func UpdateSiteTask(code string) {
  544. query := map[string]interface{}{
  545. "b_repair": false,
  546. "s_spidercode": code,
  547. }
  548. set := map[string]interface{}{
  549. "b_repair": true,
  550. "l_update_time": time.Now().Unix(),
  551. }
  552. u.MgoEB.Update("spider_important_warning", query, map[string]interface{}{"$set": set}, false, true)
  553. }
  554. func LuaSaveLog(code, user string, data *map[string]interface{}, stype int) {
  555. saveOne, _ := u.MgoEB.FindOne("luasavelog", map[string]interface{}{"state": 0, "code": code})
  556. if stype == 0 { //保存记录
  557. if len(*saveOne) == 0 && len(*data) > 0 { //重新记录
  558. delete(*data, "_id")
  559. save := map[string]interface{}{
  560. "code": code,
  561. "state": 0,
  562. "saveuser": user,
  563. "comeintime": time.Now().Unix(),
  564. "luaold": data,
  565. }
  566. u.MgoEB.Save("luasavelog", save)
  567. }
  568. } else if stype == 1 { //对比
  569. if len(*saveOne) > 0 {
  570. tmp := (*saveOne)["luaold"].(map[string]interface{})
  571. updateMap := map[string]interface{}{} //记录字段改变值
  572. for k, v := range *data {
  573. if k != "_id" && k != "state" && k != "modifytime" {
  574. if tmpV := tmp[k]; tmpV != nil { //历史记录存在字段
  575. tmpJson, _ := json.Marshal(tmpV)
  576. dataJson, _ := json.Marshal(v)
  577. if string(tmpJson) != string(dataJson) {
  578. updateMap[k] = v
  579. }
  580. delete(tmp, k) //删除对比过的字段
  581. } else { //历史记录不存在字段
  582. updateMap[k] = v
  583. }
  584. }
  585. }
  586. if len(tmp) > 0 {
  587. for k, _ := range tmp { //上架时爬虫较历史爬虫少的字段信息
  588. updateMap[k] = nil
  589. }
  590. }
  591. set := map[string]interface{}{"state": 1, "updatetime": time.Now().Unix(), "updateuser": user}
  592. if len(updateMap) > 0 { //有字段改变
  593. set["luaupdate"] = updateMap
  594. set["lusnew"] = data
  595. }
  596. u.MgoEB.UpdateById("luasavelog", (*saveOne)["_id"], map[string]interface{}{"$set": set})
  597. }
  598. }
  599. }
  600. /*
  601. 爬虫保存时,检查列表页和三级页代码中是否含lua原生方法
  602. func LuaTextCheck(code, list, detail string, type_list, infoformat int, model map[string]interface{}, msgResult map[string]string) bool {
  603. defer qu.Catch()
  604. //1、异常校验
  605. var errmsg, warnmsg string
  606. if LuaReg.MatchString(list) || LuaReg.MatchString(detail) {
  607. errmsg += "代码中含有lua原生方法;"
  608. }
  609. if ListFilterReg.MatchString(detail) && !strings.Contains(detail, "delete") { //三级页含过滤但是没有data["delete"]="true"
  610. errmsg += `三级页缺少data["delete"]="true";`
  611. }
  612. sln_reg := regexp.MustCompile(`sendListNum\(pageno,list\)`)
  613. slnIndexArr := sln_reg.FindAllStringIndex(list, -1)
  614. if type_list != 0 && len(slnIndexArr) == 0 { //列表页专家模式且不含sendListNum
  615. errmsg += "代码中缺少sendListNum(pageno,list)方法;"
  616. } else if type_list == 1 && len(slnIndexArr) > 0 { //判断sendListNum方法的位置
  617. trim_reg := regexp.MustCompile("trim")
  618. insert_reg := regexp.MustCompile("insert")
  619. trIndexArr := trim_reg.FindAllStringIndex(list, -1)
  620. irIndexArr := insert_reg.FindAllStringIndex(list, -1)
  621. slIndex := slnIndexArr[len(slnIndexArr)-1] //sendListNum位置
  622. trIndex := trIndexArr[len(trIndexArr)-1] //com.trim位置
  623. irIndex := irIndexArr[len(irIndexArr)-1] //insert位置
  624. qu.Debug("sendListNum位置:", trIndex, slIndex, irIndex)
  625. if slIndex[1] < trIndex[0] || slIndex[0] > irIndex[1] { //sendListNum方法必须在com.trim方法后,table.insert方法前
  626. errmsg += "sendListNum方法位置错误;"
  627. }
  628. }
  629. if type_list == 1 {
  630. //校验列表页area、city、distric
  631. if !strings.Contains(list, "area") {
  632. errmsg += `模板item["area"]不存在;`
  633. }
  634. if !strings.Contains(list, "city") {
  635. errmsg += `模板item["city"]不存在;`
  636. }
  637. if !strings.Contains(list, "district") {
  638. errmsg += `模板item["district"]不存在;`
  639. }
  640. area := qu.ObjToString(model["area"])
  641. city := qu.ObjToString(model["city"])
  642. district := qu.ObjToString(model["district"])
  643. if area != "" && !strings.Contains(list, area) {
  644. errmsg += "省份信息与模板不一致;"
  645. }
  646. if city != "" && !strings.Contains(list, city) {
  647. errmsg += "城市信息与模板不一致;"
  648. }
  649. if district != "" && !strings.Contains(list, district) {
  650. errmsg += "区/县信息与模板不一致;"
  651. }
  652. if infoformat == 2 && !strings.Contains(detail, "projectname") {
  653. errmsg += "拟建/审批数据缺少projectname字段;"
  654. }
  655. //校验爬虫代码的一致性
  656. if !strings.Contains(list, code) {
  657. errmsg += `模板item["spidercode"]值错误;`
  658. }
  659. }
  660. //2、提醒校验
  661. if !strings.Contains(detail, "downloadFile") && !strings.Contains(detail, "getFileAttachmentsArrayWithTag") {
  662. warnmsg += "三级页缺少下载附件方法;"
  663. }
  664. msgResult["warn"] += warnmsg
  665. msgResult["err"] = errmsg
  666. return errmsg != ""
  667. }
  668. */
  669. func LuaTextCheck(infoformat int, param map[string]interface{}, param_list_chrome, param_list_rangechrome, param_content_chrome []sp.ChromeActions, msgResult map[string]string) bool {
  670. defer qu.Catch()
  671. list := qu.ObjToString(param["str_list"])
  672. detail := qu.ObjToString(param["str_content"])
  673. type_list := qu.IntAll(param["type_list"])
  674. type_content := qu.IntAll(param["type_content"])
  675. model, _ := param["model"].(map[string]interface{})
  676. var errmsg, warnmsg string
  677. if qu.IntAll(param["type_list"]) == 0 {
  678. errmsg += "列表页非专家模式;"
  679. }
  680. if qu.IntAll(param["type_content"]) == 0 {
  681. errmsg += "详情页非专家模式;"
  682. }
  683. //1、异常校验
  684. if type_list == 1 {
  685. if u.LuaReg.MatchString(list) {
  686. errmsg += "列表页代码中含有lua原生方法;"
  687. }
  688. if strings.Contains(list, "downloadByChrome") { //chrome下载方法动作参数判断
  689. for _, act := range param_list_chrome {
  690. if act.Action != "changeip" && act.Param == "" {
  691. errmsg += "列表页chrome模式'" + act.Action + "'基础动作未填写参数,填写后注意Ctrl+F10重新插入代码;"
  692. break
  693. }
  694. }
  695. for _, act := range param_list_rangechrome {
  696. if act.Action != "changeip" && act.Param == "" {
  697. errmsg += "列表页chrome模式'" + act.Action + "'循环动作未填写参数,填写后注意Ctrl+F10重新插入代码;"
  698. break
  699. }
  700. }
  701. }
  702. if strings.Contains(list, `item["title"]="a"`) {
  703. if !strings.Contains(detail, `data["title"]`) {
  704. errmsg += "检查代码title的完整性;"
  705. }
  706. }
  707. if strings.Contains(list, "stringFind") && !strings.Contains(list, "--关键词过滤") {
  708. errmsg += "列表页代码有过滤方法stringFind但缺少注释:--关键词过滤;"
  709. }
  710. slnIndexArr := u.SendListNumReg.FindAllStringIndex(list, -1)
  711. if len(slnIndexArr) == 0 { //列表页专家模式且不含sendListNum
  712. errmsg += "代码中缺少sendListNum(pageno,list)方法;"
  713. } else if len(slnIndexArr) > 0 { //判断sendListNum方法的位置
  714. trIndexArr := u.TrimReg.FindAllStringIndex(list, -1)
  715. irIndexArr := u.InsertReg.FindAllStringIndex(list, -1)
  716. slIndex := slnIndexArr[len(slnIndexArr)-1] //sendListNum位置
  717. trIndex := trIndexArr[len(trIndexArr)-1] //com.trim位置
  718. irIndex := irIndexArr[len(irIndexArr)-1] //insert位置
  719. qu.Debug("sendListNum位置:", trIndex, slIndex, irIndex)
  720. if slIndex[1] < trIndex[0] || slIndex[0] > irIndex[1] { //sendListNum方法必须在com.trim方法后,table.insert方法前
  721. errmsg += "sendListNum方法位置错误;"
  722. }
  723. }
  724. if param, ok := param["param_common"].([]interface{}); ok && len(param) >= 3 {
  725. spidercode := qu.ObjToString(param[0])
  726. site := qu.ObjToString(param[1])
  727. channel := qu.ObjToString(param[2])
  728. if !strings.Contains(list, fmt.Sprintf(u.CheckText_Code, spidercode)) {
  729. errmsg += `爬虫代码的值与模板不一致;`
  730. }
  731. if !strings.Contains(list, fmt.Sprintf(u.CheckText_Site, site)) {
  732. errmsg += `站点的值与模板不一致;`
  733. }
  734. if !strings.Contains(list, fmt.Sprintf(u.CheckText_Channel, channel)) {
  735. warnmsg += `栏目的值与模板不一致;`
  736. }
  737. }
  738. //校验列表页area、city、distric
  739. area := qu.ObjToString(model["area"])
  740. city := qu.ObjToString(model["city"])
  741. district := qu.ObjToString(model["district"])
  742. if !strings.Contains(list, fmt.Sprintf(u.CheckText_Area+`="%s"`, area)) {
  743. errmsg += `省份信息与模板不一致`
  744. }
  745. if !strings.Contains(list, fmt.Sprintf(u.CheckText_City+`="%s"`, city)) {
  746. errmsg += `城市信息与模板不一致`
  747. }
  748. if !strings.Contains(list, fmt.Sprintf(u.CheckText_District+`="%s"`, district)) {
  749. errmsg += `区/县信息与模板不一致`
  750. }
  751. if infoformat == 2 && !strings.Contains(detail, "projectname") {
  752. errmsg += "拟建/审批数据缺少projectname字段;"
  753. }
  754. //校验爬虫代码的一致性
  755. //if !strings.Contains(list, code) {
  756. // errmsg += `模板item["spidercode"]值错误;`
  757. //}
  758. isHttps := false
  759. for _, text := range u.DomainNameReg.FindAllString(list, -1) {
  760. if strings.Contains(text, "https") {
  761. isHttps = true
  762. }
  763. }
  764. if isHttps {
  765. for tmpStr, tmpText := range map[string]string{"列表页": list, "三级页": detail} {
  766. downLoadText := u.DownLoadReg.FindString(tmpText)
  767. if downLoadText != "" {
  768. textArr := strings.Split(downLoadText, ",")
  769. if len(textArr) < 4 {
  770. errmsg += "download方法添加下载参数;"
  771. } else if len(textArr) == 4 {
  772. if !u.CodeTypeReg.MatchString(textArr[0]) || (textArr[1] != "true" && textArr[1] != "false") {
  773. errmsg += tmpStr + "download方法添加下载参数;"
  774. }
  775. }
  776. }
  777. }
  778. }
  779. }
  780. if type_content == 1 {
  781. if u.LuaReg.MatchString(detail) {
  782. errmsg += "详情页代码中含有lua原生方法;"
  783. }
  784. if u.ListFilterReg.MatchString(detail) && !strings.Contains(detail, "delete") { //三级页含过滤但是没有data["delete"]="true"
  785. warnmsg += `详情页缺少data["delete"]="true";`
  786. }
  787. if !strings.Contains(detail, "s_title") {
  788. errmsg += "详情页缺少s_title;"
  789. }
  790. if strings.Contains(detail, "downloadByChrome") { //chrome下载方法动作参数判断
  791. for _, act := range param_content_chrome {
  792. if act.Action != "changeip" && act.Param == "" {
  793. errmsg += "详情页chrome模式'" + act.Action + "'动作未填写参数,填写后注意Ctrl+F10重新插入代码;"
  794. }
  795. }
  796. }
  797. }
  798. //2、提醒校验
  799. if !strings.Contains(detail, "downloadFile") && !strings.Contains(detail, "getFileAttachmentsArrayWithTag") {
  800. warnmsg += "详情页缺少下载附件方法;"
  801. }
  802. msgResult["warn"] += warnmsg
  803. msgResult["err"] += errmsg
  804. return msgResult["err"] != ""
  805. }
  806. // 方法测试
  807. func (f *Front) RunStep() {
  808. imodal, _ := f.GetInteger("imodal")
  809. script, _ := f.GetBool("script")
  810. listcheck := f.GetString("listcheck")
  811. contentcheck := f.GetString("contentcheck")
  812. downloadnode := f.GetString("downloadnode") //下载节点
  813. common := []interface{}{
  814. f.Base.SpiderCode,
  815. f.Base.SpiderName,
  816. f.Base.SpiderChannel,
  817. f.Base.SpiderDownDetailPage,
  818. f.Base.SpiderStartPage,
  819. f.Base.SpiderMaxPage,
  820. f.Base.SpiderRunRate,
  821. f.Base.Spider2Collection,
  822. f.Base.SpiderPageEncoding,
  823. f.Base.SpiderStoreMode,
  824. f.Base.SpiderStoreToMsgEvent,
  825. f.Base.SpiderTargetChannelUrl,
  826. f.Base.SpiderLastDownloadTime,
  827. f.Base.SpiderIsHistoricalMend,
  828. f.Base.SpiderIsMustDownload,
  829. "",
  830. "",
  831. "",
  832. }
  833. if f.Method() == "POST" {
  834. switch f.GetString("step") {
  835. case "Step1": //publishtime
  836. ptime := []interface{}{
  837. f.Step1.DateFormat,
  838. f.Step1.Address,
  839. f.Step1.ContentChooser,
  840. }
  841. if script {
  842. _, scripts := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal, 1)
  843. f.ServeJson(scripts)
  844. return
  845. }
  846. rs, err := spider.GetLastPublishTime(common, ptime, f.Step1.Expert, downloadnode, imodal)
  847. if err == nil {
  848. f.ServeJson(rs)
  849. }
  850. case "Step2": //list
  851. if imodal == 2 { //chromedp测试
  852. //基础动作
  853. if f.Step2.Chrome != "" {
  854. chromeActions := []sp.ChromeActions{}
  855. rangeChromeActions := []sp.ChromeActions{}
  856. var result []interface{}
  857. //格式化基础动作
  858. if json.Unmarshal([]byte(f.Step2.Chrome), &chromeActions) != nil {
  859. f.ServeJson("chrome基础动作格式化错误")
  860. return
  861. }
  862. //格式化循环动作
  863. if f.Step2.RangeChrome != "" {
  864. if json.Unmarshal([]byte(f.Step2.RangeChrome), &rangeChromeActions) != nil {
  865. f.ServeJson("chrome循环动作格式化错误")
  866. return
  867. }
  868. }
  869. //拼装基础和循环动作
  870. if len(rangeChromeActions) > 0 && f.Step2.RangeTimes > 0 {
  871. for times := 1; times <= f.Step2.RangeTimes; times++ {
  872. chromeActions = append(chromeActions, rangeChromeActions...)
  873. }
  874. }
  875. chromeTask := sp.ChromeTask{
  876. TimeOut: 120, //测试默认两分钟
  877. Actions: chromeActions,
  878. }
  879. result = spider.DownloadByChrome(downloadnode, "", chromeTask, 150)
  880. resultMap := map[int]interface{}{}
  881. for i, r := range result {
  882. resultMap[i] = r
  883. }
  884. f.ServeJson(resultMap)
  885. } else {
  886. f.ServeJson("未进行动作编排")
  887. }
  888. } else {
  889. addrs := strings.Split(f.Step2.Listadds, "\n")
  890. if len(addrs) > 0 {
  891. for k, v := range addrs {
  892. addrs[k] = "'" + v + "'"
  893. }
  894. f.Step2.Listadds = strings.Join(addrs, ",")
  895. } else if len(f.Step2.Listadds) > 5 {
  896. f.Step2.Listadds = "'" + f.Step2.Listadds + "'"
  897. } else {
  898. f.Step2.Listadds = ""
  899. }
  900. list := []interface{}{
  901. f.Step2.Listadd,
  902. f.Step2.Listadds,
  903. f.Step2.BlockChooser,
  904. f.Step2.AddressChooser,
  905. f.Step2.TitleChooser,
  906. f.Step2.DateChooser,
  907. f.Step2.DateFormat,
  908. }
  909. listcheck = strings.Replace(listcheck, "\n", "\\n", -1)
  910. listcheck = strings.Replace(listcheck, "\"", "\\\"", -1)
  911. s_model := f.GetString("model")
  912. configModel := sp.Config.Model[s_model]
  913. model := map[string]interface{}{}
  914. for k, _ := range configModel {
  915. model[k] = f.GetString(k)
  916. }
  917. if script {
  918. _, script := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal, 1)
  919. f.ServeJson(script)
  920. return
  921. }
  922. rs, err := spider.GetPageList(common, list, model, listcheck, f.Step2.Expert, downloadnode, imodal)
  923. if err == nil {
  924. f.ServeJson(rs)
  925. } else if err.(error).Error() == "no" {
  926. f.ServeJson(rs[0])
  927. }
  928. }
  929. case "Step3": //detail
  930. if imodal == 2 { //chromedp测试
  931. chromeActions := []sp.ChromeActions{}
  932. var result []interface{}
  933. if json.Unmarshal([]byte(f.Step3.Chrome), &chromeActions) == nil {
  934. chromeTask := sp.ChromeTask{
  935. TimeOut: 120, //测试默认两分钟
  936. Actions: chromeActions,
  937. }
  938. result = spider.DownloadByChrome(downloadnode, "", chromeTask, 150)
  939. } else {
  940. result = append(result, "chrome task json 格式化错误")
  941. }
  942. resultMap := map[int]interface{}{}
  943. for i, r := range result {
  944. resultMap[i] = r
  945. }
  946. f.ServeJson(resultMap)
  947. } else {
  948. content := []interface{}{
  949. f.Step3.ContentChooser,
  950. f.Step3.ElementChooser,
  951. }
  952. contentcheck = strings.Replace(contentcheck, "\n", "\\n", -1)
  953. contentcheck = strings.Replace(contentcheck, "\"", "\\\"", -1)
  954. data := map[string]interface{}{}
  955. data["title"] = f.Step3.T_title
  956. data["href"] = f.Step3.T_href
  957. data["publishtime"] = f.Step3.T_date
  958. if script {
  959. _, script := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal, 1)
  960. f.ServeJson(script)
  961. return
  962. }
  963. rs, err := spider.GetContentInfo(common, content, data, contentcheck, f.Step3.Expert, downloadnode, imodal)
  964. if projectinfo, ok := rs["projectinfo"].(map[string]interface{}); ok && projectinfo != nil {
  965. if attachments, ok := projectinfo["attachments"].(map[string]interface{}); ok && attachments != nil {
  966. for _, tmp := range attachments {
  967. tmpMap := tmp.(map[string]interface{})
  968. if qu.ObjToString(tmpMap["filename"]) == "附件中含有乱码" {
  969. rs["msg"] = "附件中含有乱码"
  970. }
  971. }
  972. }
  973. }
  974. if err == nil {
  975. f.ServeJson(rs)
  976. } else {
  977. f.ServeJson(rs["no"])
  978. }
  979. }
  980. }
  981. }
  982. }
  983. // 爬虫测试数据json
  984. func (f *Front) GetJson() {
  985. code := f.GetString("code")
  986. username := f.GetSession("username").(string)
  987. if tr := TestResultMap[username+code]; tr != nil {
  988. task, _ := u.MgoEB.FindOne("task", map[string]interface{}{"code": code, "i_state": 3})
  989. comeintime := int64(0)
  990. if len(*task) > 0 {
  991. comeintime = qu.Int64All((*task)["l_comeintime"])
  992. tr.task_remark = "审核任务创建时间:" + qu.FormatDateByInt64(&comeintime, qu.Date_Short_Layout) + ";" + tr.task_remark
  993. }
  994. data := tr.dataInfo
  995. result := tr.listInfo
  996. if len(data) > 0 {
  997. data["contenthtml"] = ""
  998. }
  999. num := 0
  1000. list_fir := []map[string]interface{}{}
  1001. list_sec := []map[string]interface{}{}
  1002. for page, list := range result {
  1003. for k, v := range list {
  1004. v["a_index"] = k + 1
  1005. num++
  1006. }
  1007. if page == 1 {
  1008. list_fir = list
  1009. } else if page == 2 {
  1010. list_sec = list
  1011. }
  1012. }
  1013. f.T["list_fir"] = list_fir
  1014. f.T["list_sec"] = list_sec
  1015. f.T["data"] = data
  1016. f.T["num"] = num
  1017. f.T["descript"] = tr.task_descript
  1018. f.T["remark"] = tr.task_remark
  1019. f.T["rateremark"] = tr.task_rateremark
  1020. f.T["reason"] = tr.reason
  1021. f.T["msg"] = tr.msg
  1022. f.T["comeintime"] = comeintime
  1023. delete(TestResultMap, username+code)
  1024. }
  1025. f.Render("jsonInfo.html", &f.T)
  1026. }
  1027. var TestResultMap = map[string]*TestResult{} //username+code
  1028. // 某个爬虫整体测试结果
  1029. type TestResult struct {
  1030. task_remark string
  1031. task_rateremark []string
  1032. task_descript string
  1033. reason string
  1034. msg string
  1035. listInfo map[int64][]map[string]interface{}
  1036. dataInfo map[string]interface{}
  1037. }
  1038. // 整体测试
  1039. func (f *Front) SpiderPass() {
  1040. defer mu.Catch()
  1041. tr := &TestResult{}
  1042. result := map[int64][]map[string]interface{}{}
  1043. data := map[string]interface{}{}
  1044. msgArr := []string{}
  1045. code := f.GetString("code")
  1046. downloadnode := f.GetString("node")
  1047. //根据code查询待确认任务
  1048. query := map[string]interface{}{
  1049. "s_code": code,
  1050. "i_state": 3,
  1051. }
  1052. task, _ := u.MgoEB.FindOne("task", query)
  1053. descript := "null"
  1054. remark := "null"
  1055. remarktmp := []string{}
  1056. rateremarktmp := []string{}
  1057. if len(*task) > 0 {
  1058. descript = (*task)["s_descript"].(string)
  1059. if mrecord, ok := (*task)["a_mrecord"].([]interface{}); ok {
  1060. for _, m := range mrecord {
  1061. remarkInfo := m.(map[string]interface{})
  1062. if remark := qu.ObjToString(remarkInfo["s_mrecord_remark"]); remark != "" {
  1063. remarktmp = append(remarktmp, remark+";")
  1064. }
  1065. if rateremark := qu.ObjToString(remarkInfo["s_mrecord_rateremark"]); rateremark != "" {
  1066. rateremarktmp = append(rateremarktmp, rateremark+";")
  1067. }
  1068. }
  1069. }
  1070. }
  1071. if len(remarktmp) > 0 {
  1072. remark = ""
  1073. remark = strings.Join(remarktmp, "")
  1074. }
  1075. tr.task_remark = remark
  1076. tr.task_rateremark = rateremarktmp
  1077. tr.task_descript = descript
  1078. //基本信息、方法一(发布时间)、方法二(列表页)、方法三(详情页)、总请求次数、go方法一、go方法二、go方法三、列表页条数
  1079. steps := []interface{}{false, false, false, false, 0, 0, 0, 0, 0}
  1080. one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
  1081. reason, _ := (*one)["reason"].(string)
  1082. tr.reason = reason
  1083. if len(*one) > 0 && (*one)["oldlua"] == nil {
  1084. common := (*one)["param_common"].([]interface{})
  1085. if len(common) < 13 {
  1086. f.ServeJson(steps)
  1087. return
  1088. } else {
  1089. steps[0] = true
  1090. }
  1091. } else {
  1092. steps[0] = true
  1093. }
  1094. script, liststr, contentstr := "", "", ""
  1095. if (*one)["oldlua"] == nil {
  1096. script, liststr, contentstr = spider.GetScriptByCode(code)
  1097. } else {
  1098. script = (*one)["luacontent"].(string)
  1099. }
  1100. if liststr != "" && contentstr != "" {
  1101. msgArr = u.SpiderPassCheckLua(liststr, contentstr, (*one)) //校验
  1102. }
  1103. s := spider.CreateSpider(downloadnode, script)
  1104. s.SpiderMaxPage = 2 //采集列表页总页数
  1105. s.Timeout = 60
  1106. timestr, timeerr := s.GetLastPublishTime()
  1107. if timeerr == nil && len(timestr) > 4 {
  1108. steps[1] = true //发布时间获取成功
  1109. downloadNum := 0
  1110. result, downloadNum, _ = s.DownListPageItem() //列表页采集结果
  1111. if downloadNum == 0 {
  1112. f.ServeJson(steps)
  1113. return
  1114. }
  1115. steps[2] = true //列表页获取成功
  1116. steps[8] = downloadNum //下载量
  1117. tr.listInfo = result
  1118. if s.DownDetail {
  1119. onePageList := result[1] //第一页数据
  1120. if onePageDataNum := len(onePageList); onePageDataNum > 0 {
  1121. index := onePageDataNum / 2 //取一条数据下载三级页
  1122. param := map[string]string{}
  1123. for k, v := range onePageList[index] {
  1124. param[k] = qu.ObjToString(v)
  1125. }
  1126. data = map[string]interface{}{}
  1127. s.DownloadDetailPage(param, data)
  1128. tr.dataInfo = data
  1129. if len(data) == 0 || qu.ObjToString(data["detail"]) == "" {
  1130. steps[3] = false //详情页获取失败
  1131. } else {
  1132. steps[3] = true //详情页获取成功
  1133. }
  1134. }
  1135. } else {
  1136. steps[3] = true //详情页获取成功
  1137. }
  1138. //list, _ = s.DownListPageItem()
  1139. //for _, l := range list {
  1140. // if publishtime := qu.ObjToString(l["publishtime"]); publishtime == "0" || publishtime == "" {
  1141. // msgArr = append(msgArr, "列表页publishtime取值异常")
  1142. // break
  1143. // } else {
  1144. // t, err := time.ParseInLocation(qu.Date_Full_Layout, publishtime, time.Local)
  1145. // if err != nil || t.Unix() <= 0 {
  1146. // msgArr = append(msgArr, "列表页publishtime取值异常")
  1147. // break
  1148. // }
  1149. // }
  1150. //}
  1151. //if len(list) > 0 {
  1152. // tr.listInfo = list
  1153. // listone := list[0]
  1154. // if len(qu.ObjToString(listone["href"])) < 7 ||
  1155. // (qu.ObjToString(listone["publishtime"]) != "0" && len(qu.ObjToString(listone["publishtime"])) < 5) ||
  1156. // len(qu.ObjToString(listone["title"])) < 3 {
  1157. // f.ServeJson(steps)
  1158. // return
  1159. // } else {
  1160. // steps[2] = true
  1161. // if s.DownDetail {
  1162. // param := map[string]string{}
  1163. // index := 0
  1164. // if len(list) > 0 {
  1165. // steps[8] = len(list)
  1166. // index = len(list) / 2
  1167. // for k, v := range list[index] {
  1168. // param[k] = qu.ObjToString(v)
  1169. // }
  1170. // data = map[string]interface{}{}
  1171. // s.DownloadDetailPage(param, data)
  1172. // if len(data) > 0 {
  1173. // tr.dataInfo = data
  1174. // }
  1175. // if len(data) == 0 || data["detail"].(string) == "" {
  1176. // steps[3] = false
  1177. // } else {
  1178. // steps[3] = true
  1179. // }
  1180. // }
  1181. // } else {
  1182. // steps[3] = true
  1183. // }
  1184. // }
  1185. //}
  1186. }
  1187. //关闭laustate
  1188. s.L.Close()
  1189. steps[4] = s.Test_luareqcount
  1190. steps[5] = s.Test_goreqtime
  1191. steps[6] = s.Test_goreqlist
  1192. steps[7] = s.Test_goreqcon
  1193. //校验
  1194. param_common, _ := (*one)["param_common"].([]interface{})
  1195. if qu.IntAll(param_common[5]) == 1 && len(result) > 1 { //页码为1,但能翻页
  1196. msgArr = append(msgArr, "爬虫可以翻页,最大页为1是否合适")
  1197. }
  1198. msg := u.SpiderPassCheckListAndDetail(result, data)
  1199. msgArr = append(msgArr, msg...)
  1200. username := f.GetSession("username").(string)
  1201. tr.msg = strings.Join(msgArr, ";")
  1202. TestResultMap[username+code] = tr
  1203. f.ServeJson(steps)
  1204. }
  1205. func (f *Front) DownSpider(code string) {
  1206. auth := qu.IntAll(f.GetSession("auth"))
  1207. user := f.GetSession("loginuser")
  1208. success := false
  1209. script := ""
  1210. if auth > u.Role_Dev {
  1211. success = true
  1212. //one, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
  1213. one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
  1214. createuserid := qu.ObjToString((*one)["createuserid"])
  1215. filename := code + ".lua"
  1216. if len(*one) > 0 {
  1217. if (*one)["oldlua"] != nil {
  1218. if (*one)["luacontent"] != nil {
  1219. script = (*one)["luacontent"].(string)
  1220. }
  1221. } else {
  1222. user, _ := u.MgoEB.FindById("user", createuserid, nil)
  1223. name := (*one)["createuser"]
  1224. email := (*user)["s_email"]
  1225. upload := time.Now().Format("2006-01-02 15:04:05")
  1226. script, _, _ = spider.GetScriptByCode(code, name, email, upload)
  1227. }
  1228. }
  1229. f.ResponseWriter.Header().Del("Content-Type")
  1230. f.ResponseWriter.Header().Add("Content-Type", "application/x-download")
  1231. f.ResponseWriter.Header().Add("Content-Disposition", "attachment;filename=spider_"+filename)
  1232. f.WriteBytes([]byte(script))
  1233. } else {
  1234. f.Write("您没有权限")
  1235. }
  1236. //记录日志
  1237. downlogs := map[string]interface{}{
  1238. "code": code,
  1239. "user": user,
  1240. "auth": auth,
  1241. "time": time.Now().Unix(),
  1242. "success": success,
  1243. "script": script,
  1244. }
  1245. u.MgoEB.Save("luadownlogs", downlogs)
  1246. }
  1247. //下架删除download数据
  1248. //func delDownloadData(code string) bool {
  1249. // return mgu.Del("download", "spider", "spider", `{"code":"`+code+`"}`)
  1250. //}
  1251. //批量作废删除download数据
  1252. //func disableDelDownloadData(code []string) {
  1253. // for _, v := range code {
  1254. // flag := delDownloadData(v)
  1255. // log.Println(code, "---批量删除download数据:", flag)
  1256. // }
  1257. //}
  1258. // 爬虫核对
  1259. func (f *Front) Checktime() {
  1260. code := f.GetString("code")
  1261. auth := qu.IntAll(f.GetSession("auth"))
  1262. if auth != u.Role_Admin {
  1263. f.ServeJson(false)
  1264. } else {
  1265. //b := u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{
  1266. // "l_checktime": time.Now().Unix(),
  1267. //}}, true, false)
  1268. b := u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{
  1269. "l_checktime": time.Now().Unix(),
  1270. }}, true, false)
  1271. f.ServeJson(b)
  1272. }
  1273. }
  1274. // 批量作废
  1275. func (f *Front) Disables() error {
  1276. auth := qu.IntAll(f.GetSession("auth"))
  1277. events := strings.Split(f.GetString("events"), ",")
  1278. codes := strings.Split(f.GetString("codes"), ",")
  1279. disablereason := f.GetString("disablereason")
  1280. state, _ := f.GetInteger("state")
  1281. res := ""
  1282. if auth == u.Role_Admin {
  1283. for k, code := range codes {
  1284. //更新爬虫状态时爬虫下架
  1285. upresult, err := spider.UpdateSpiderByCodeState(code, "6", qu.IntAll(events[k])) //下架
  1286. qu.Debug("下架:", code, upresult)
  1287. if upresult && err == nil { //下架成功
  1288. //更新爬虫
  1289. u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"state": state, "disablereason": disablereason}}, false, false)
  1290. //修改任务状态
  1291. u.MgoEB.Update("task", map[string]interface{}{
  1292. "s_code": code,
  1293. "i_state": map[string]interface{}{
  1294. "$nin": []int{4, 6}, //更新除审核通过和已关闭的任务
  1295. },
  1296. },
  1297. map[string]interface{}{"$set": map[string]interface{}{"i_state": 6, "l_updatetime": time.Now().Unix()}}, false, true)
  1298. if err != nil {
  1299. res = res + code + ", OK" + qu.ObjToString(err.Error()) + ";"
  1300. } else {
  1301. res = res + code + ", OK" + ";"
  1302. }
  1303. } else {
  1304. res = res + code + ", 更新失败;"
  1305. }
  1306. }
  1307. } else {
  1308. res = "没有权限"
  1309. }
  1310. f.ServeJson(res)
  1311. return nil
  1312. }
  1313. // 批量上下架
  1314. func (f *Front) BatchShelves() {
  1315. codes := strings.Split(f.GetString("codes"), ",")
  1316. state, _ := f.GetInteger("state")
  1317. auth := qu.IntAll(f.GetSession("auth"))
  1318. errCode := []string{}
  1319. var err error
  1320. b := false
  1321. if IsHasUpState(auth, Sp_state_5) {
  1322. if state == 5 { //批量上架
  1323. for _, code := range codes {
  1324. if _, ok := u.CodeTimeCache.Get(code); ok { //避免短时间内重复上架
  1325. errCode = append(errCode, code+"重复上架")
  1326. } else {
  1327. u.CodeTimeCache.Set(code, "", time.Minute*2) //上架时,内存记录上架爬虫
  1328. _, err = UpStateAndUpSpider(code, "", "", "", Sp_state_5)
  1329. if err != nil {
  1330. errCode = append(errCode, code)
  1331. } else {
  1332. u.CodeTimeCache.Del(code) //上架成功后,删除内存记录
  1333. }
  1334. }
  1335. }
  1336. } else { //批量下架
  1337. for _, code := range codes {
  1338. b, err = UpStateAndUpSpider(code, "", "", "", Sp_state_6)
  1339. if !b || err != nil {
  1340. errCode = append(errCode, code)
  1341. }
  1342. //下架删除download数据
  1343. //if b {
  1344. // flag := delDownloadData(code)
  1345. // log.Println(code, "---删除download数据:", flag)
  1346. //}
  1347. }
  1348. }
  1349. } else {
  1350. errCode = append(errCode, "没有权限")
  1351. }
  1352. f.ServeJson(errCode)
  1353. }
  1354. // 更新爬虫状态
  1355. func (f *Front) UpState() error {
  1356. username := f.GetSession("username").(string)
  1357. code := f.GetString("code")
  1358. state, _ := f.GetInt("state")
  1359. id := f.GetString("taskId")
  1360. reason := f.GetString("reason")
  1361. auth := qu.IntAll(f.GetSession("auth"))
  1362. var codeArr = []string{code}
  1363. var taskid []string
  1364. //修改任务状态
  1365. istotask := false
  1366. res := map[string]interface{}{
  1367. "istotask": istotask,
  1368. "err": "没有权限",
  1369. "code": sp.Se.Encode2Hex(code),
  1370. "taskid": taskid,
  1371. }
  1372. var xgTime int64
  1373. if f.GetSession(id) == nil || f.GetSession(id) == "" {
  1374. xgTime = time.Now().Unix()
  1375. } else {
  1376. xgTimeStr := qu.ObjToString(f.GetSession(id))
  1377. xgTimeTmp, _ := time.ParseInLocation("2006-01-02 15:04:05", xgTimeStr, time.Local)
  1378. xgTime = xgTimeTmp.Unix()
  1379. }
  1380. f.DelSession(id)
  1381. if IsHasUpState(auth, int(state)) {
  1382. b, err := UpStateAndUpSpider(code, "", reason, username, int(state)) //更新爬虫状态
  1383. if b && state == Sp_state_1 { //提交审核
  1384. //有对应任务跳转提交记录页
  1385. taskid = CheckTask(codeArr, 1)
  1386. if len(taskid) > 0 {
  1387. res["istotask"] = true
  1388. res["taskid"] = taskid[0]
  1389. }
  1390. } else if b && state == Sp_state_2 { //打回
  1391. taskid = CheckTask(codeArr, 2)
  1392. if len(taskid) > 0 {
  1393. //UpTaskState([]string{taskid}, 2) //修改状态
  1394. UpTaskState(taskid, 2, "", int64(0)) //修改任务状态
  1395. SaveRemark(taskid, reason, username) //保存记录信息
  1396. }
  1397. } else if b && state == Sp_state_3 { //审核通过
  1398. taskid = CheckTask(codeArr, 3)
  1399. if len(taskid) > 0 {
  1400. //UpTaskState([]string{taskid}, 3)
  1401. UpTaskState(taskid, 3, "", int64(0))
  1402. SaveRemark(taskid, "", username)
  1403. }
  1404. } else if b && state == Sp_state_6 { //下架
  1405. //下架成功删除心跳数据
  1406. flag := DelSpiderHeart(code)
  1407. log.Println(code, "---下架删除download数据:", flag)
  1408. } else if b && state == Sp_state_7 { //反馈
  1409. taskid = CheckTask(codeArr, 7)
  1410. if len(taskid) > 0 {
  1411. UpTaskState(taskid, 7, reason, xgTime)
  1412. }
  1413. }
  1414. if err != nil {
  1415. res["err"] = err.Error()
  1416. f.ServeJson(res)
  1417. } else {
  1418. res["err"] = ""
  1419. f.ServeJson(res)
  1420. }
  1421. } else {
  1422. f.ServeJson(res)
  1423. }
  1424. return nil
  1425. }
  1426. func (f *Front) Assort() {
  1427. state, _ := f.GetInteger("state")
  1428. code := f.GetString("code")
  1429. codes := u.SymbolReg.Split(code, -1)
  1430. success := true
  1431. msg := ""
  1432. for _, code := range codes {
  1433. query := map[string]interface{}{
  1434. "code": code,
  1435. }
  1436. //下架爬虫
  1437. //lua, _ := u.MgoE.FindOne("luaconfig", query)
  1438. lua, _ := u.MgoEB.FindOne("luaconfig", query)
  1439. event := qu.IntAll((*lua)["event"])
  1440. if (*lua)["downevent"] != nil { //爬虫开发修改爬虫节点,审核人员分类爬虫时,原来爬虫所在节点下架
  1441. event = qu.IntAll((*lua)["downevent"])
  1442. }
  1443. upresult, err := spider.UpdateSpiderByCodeState(code, "6", event)
  1444. qu.Debug("下架爬虫:", code, upresult, err)
  1445. if upresult && err == nil {
  1446. //更新爬虫状态
  1447. update := map[string]interface{}{
  1448. "$set": map[string]interface{}{
  1449. "state": state,
  1450. //"modifytime": time.Now().Unix(),
  1451. "l_uploadtime": time.Now().Unix(),
  1452. },
  1453. }
  1454. //u.MgoE.Update("luaconfig", query, update, false, false)
  1455. u.MgoEB.Update("luaconfig", query, update, false, false)
  1456. //关闭任务
  1457. query = map[string]interface{}{
  1458. "s_code": code,
  1459. }
  1460. update = map[string]interface{}{
  1461. "$set": map[string]interface{}{
  1462. "i_state": 6,
  1463. },
  1464. }
  1465. u.MgoEB.Update("task", query, update, false, true)
  1466. //删除心跳
  1467. DelSpiderHeart(code)
  1468. } else {
  1469. success = false
  1470. msg += code + ";"
  1471. }
  1472. }
  1473. f.ServeJson(map[string]interface{}{"success": success, "msg": msg})
  1474. }
  1475. // 更新爬虫状态,并判断是否更新节点爬虫
  1476. func UpStateAndUpSpider(code, id, reason, username string, state int) (bool, error) {
  1477. upresult := false
  1478. var err error
  1479. one := &map[string]interface{}{}
  1480. if code != "" {
  1481. //one, _ = u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
  1482. one, _ = u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
  1483. } else {
  1484. //one, _ = u.MgoE.FindById("luaconfig", id, nil)
  1485. one, _ = u.MgoEB.FindById("luaconfig", id, nil)
  1486. code = qu.ObjToString((*one)["code"])
  1487. }
  1488. if len(*one) > 0 {
  1489. var event int
  1490. unset := map[string]interface{}{}
  1491. if (*one)["event"] != nil {
  1492. event = qu.IntAll((*one)["event"])
  1493. } else {
  1494. for k, _ := range sp.Config.Uploadevents { //
  1495. event = qu.IntAll(k)
  1496. break
  1497. }
  1498. //r := rand.New(rand.NewSource(time.Now().UnixNano()))
  1499. //event = sp.Config.Uploadevents[r.Intn(len(sp.Config.Uploadevents))]
  1500. }
  1501. //oldstate := qu.IntAll(one["state"])
  1502. switch state {
  1503. case Sp_state_4, Sp_state_6: //作废、下架
  1504. // if oldstate == Sp_state_5 {
  1505. // upresult = false
  1506. // err = errors.New("已上架不允许作废")
  1507. // } else {
  1508. // upresult = true
  1509. // }
  1510. upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event) //下架
  1511. qu.Debug("下架:", upresult, code)
  1512. case Sp_state_5: //上架(爬虫端在更新上架的时候为了更新内存中字段,采用先下架上架)
  1513. if downevent := qu.IntAll((*one)["downevent"]); downevent != 0 { //爬虫开发修改爬虫节点,审核人员上架爬虫时,原来爬虫所在节点下架
  1514. upresult, err = spider.UpdateSpiderByCodeState(code, "6", downevent)
  1515. qu.Debug(code, "下架历史节点:", downevent)
  1516. if upresult && err == nil {
  1517. unset = map[string]interface{}{"downevent": ""}
  1518. }
  1519. } else {
  1520. upresult, err = spider.UpdateSpiderByCodeState(code, "6", event)
  1521. }
  1522. qu.Debug("下架:", upresult, code, event)
  1523. if upresult && err == nil {
  1524. upresult, err = spider.UpdateSpiderByCodeState(code, fmt.Sprint(state), event)
  1525. qu.Debug("上架:", upresult, code, event)
  1526. }
  1527. case Sp_state_3: //审核通过
  1528. //校验爬虫三级页是否有附件下载方法
  1529. str_content := qu.ObjToString((*one)["str_content"])
  1530. if !strings.Contains(str_content, "downloadFile") && !strings.Contains(str_content, "getFileAttachmentsArrayWithTag") {
  1531. err = errors.New("三级页缺少下载附件方法")
  1532. } else {
  1533. upresult = true
  1534. }
  1535. default:
  1536. upresult = true
  1537. err = nil
  1538. }
  1539. if err != nil && strings.Contains(err.Error(), "timeout") {
  1540. err = errors.New("连接节点" + fmt.Sprint(event) + "超时")
  1541. upresult = true
  1542. }
  1543. if upresult && err == nil {
  1544. upset := map[string]interface{}{"state": state} //修改状态
  1545. if (*one)["oldlua"] != nil { //老脚本上传
  1546. //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, true, false)
  1547. up := map[string]interface{}{
  1548. "$set": upset,
  1549. }
  1550. if len(unset) > 0 {
  1551. up["$unset"] = unset
  1552. }
  1553. upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, true, false)
  1554. } else {
  1555. if state == Sp_state_1 { //提交审核
  1556. upset["l_complete"] = time.Now().Unix()
  1557. upset["report"] = ""
  1558. UpdateSiteTask(code) //更新重点站点任务
  1559. } else if state == Sp_state_7 { //反馈问题
  1560. upset["report"] = reason
  1561. upset["state"] = 1 //反馈后爬虫改为待审核
  1562. upset["l_complete"] = time.Now().Unix()
  1563. } else if state == Sp_state_3 { //审核通过
  1564. if (*one)["event"] == nil {
  1565. upset["event"] = event
  1566. //upset["modifytime"] = time.Now().Unix()
  1567. }
  1568. upset["frequencyerrtimes"] = 0 //爬虫审核通过,重置采集频率异常次数
  1569. upset["l_uploadtime"] = time.Now().Unix()
  1570. } else if state == Sp_state_2 { //打回原因
  1571. upset["reason"] = reason
  1572. //清理机检结果
  1573. unset["check"] = ""
  1574. unset["checkok"] = ""
  1575. unset["checktime"] = ""
  1576. LuaCheckSaveLog(one) //保存机检日志
  1577. } else if state == Sp_state_5 { //上架
  1578. upset["l_checktime"] = 0 //核对时间重置
  1579. //清理机检结果
  1580. unset["check"] = ""
  1581. unset["checkok"] = ""
  1582. unset["checktime"] = ""
  1583. LuaCheckSaveLog(one) //保存机检日志
  1584. LuaSaveLog(code, username, one, 1)
  1585. go ModifyLogs_SaveCodeLogs(code, *one)
  1586. } else if state == Sp_state_4 { //作废,作废原因
  1587. upset["disablereason"] = reason
  1588. //upset["modifytime"] = time.Now().Unix()
  1589. upset["l_uploadtime"] = time.Now().Unix() //l_complete爬虫完成时间
  1590. }
  1591. up := map[string]interface{}{
  1592. "$set": upset,
  1593. }
  1594. if len(unset) > 0 {
  1595. up["$unset"] = unset
  1596. }
  1597. //upresult = u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, map[string]interface{}{"$set": upset}, false, false)
  1598. upresult = u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, up, false, false)
  1599. qu.Debug("提交日志:", code, upset, upresult)
  1600. if upresult && (state == Sp_state_2 || state == Sp_state_3) { //打回、审核记录日志
  1601. types := "打回"
  1602. if state == Sp_state_3 {
  1603. types = "审核"
  1604. }
  1605. event := qu.IntAll((*one)["event"])
  1606. obj := map[string]interface{}{
  1607. "code": code,
  1608. "auditor": username,
  1609. "types": types,
  1610. "comeintime": time.Now().Unix(),
  1611. "reason": reason,
  1612. "spideruser": (*one)["createuser"],
  1613. "modifytime": (*one)["modifytime"],
  1614. "event": event,
  1615. "site": (*one)["site"],
  1616. "channel": (*one)["channel"],
  1617. }
  1618. if !strings.HasSuffix(code, u.Bu) { //凡是以_bu结尾的爬虫一律不计入审核记录
  1619. //新爬虫审核记录表
  1620. if event == 7000 && (state == Sp_state_3 || state == Sp_state_2) {
  1621. count := u.MgoEB.Count("lua_logs_auditor", map[string]interface{}{"code": code, "types": "审核"})
  1622. if count == 0 { //新爬虫审核记录
  1623. u.MgoEB.Save("lua_logs_auditor_new", obj)
  1624. }
  1625. }
  1626. u.MgoEB.Save("lua_logs_auditor", obj) //历史维护爬虫审核记录
  1627. }
  1628. }
  1629. }
  1630. }
  1631. }
  1632. return upresult, err
  1633. }
  1634. // 保存记录信息
  1635. func SaveRemark(taskid []string, reason, username string) {
  1636. timeNow := time.Now().Unix()
  1637. if reason == "" {
  1638. reason = "审核通过"
  1639. }
  1640. for _, id := range taskid {
  1641. task, _ := u.MgoEB.FindById("task", id, nil)
  1642. if task != nil && len(*task) > 0 {
  1643. checkData := (*task)["a_check"]
  1644. var checkArr []map[string]interface{}
  1645. newData := make(map[string]interface{})
  1646. newData["s_check_checkUser"] = username
  1647. newData["l_check_checkTime"] = timeNow
  1648. newData["s_check_checkRemark"] = reason
  1649. if checkData != nil {
  1650. myArr := qu.ObjArrToMapArr(checkData.([]interface{}))
  1651. if myArr != nil && len(myArr) > 0 {
  1652. for _, v := range myArr {
  1653. checkArr = append(checkArr, v)
  1654. }
  1655. }
  1656. }
  1657. checkArr = append(checkArr, newData)
  1658. (*task)["a_check"] = checkArr
  1659. u.MgoEB.UpdateById("task", id, map[string]interface{}{"$set": &task})
  1660. }
  1661. }
  1662. }
  1663. // 修改任务状态
  1664. func UpTaskState(code []string, num int, reason string, startTime int64) {
  1665. query := map[string]interface{}{}
  1666. update := map[string]interface{}{}
  1667. for _, v := range code {
  1668. if num == 1 || num == 2 || num == 3 || num == 7 { //id
  1669. query = map[string]interface{}{
  1670. "_id": mongodb.StringTOBsonId(v),
  1671. }
  1672. } else {
  1673. query = map[string]interface{}{ //code
  1674. "s_code": v,
  1675. }
  1676. }
  1677. if num == 1 { //提交审核
  1678. update = map[string]interface{}{
  1679. "$set": map[string]interface{}{
  1680. "i_state": 3,
  1681. },
  1682. }
  1683. } else if num == 2 { //打回 -->未通过
  1684. update = map[string]interface{}{
  1685. "$set": map[string]interface{}{
  1686. "i_state": 5,
  1687. "l_updatetime": time.Now().Unix(),
  1688. },
  1689. }
  1690. } else if num == 3 { //发布(审核通过) -->审核通过
  1691. update = map[string]interface{}{
  1692. "$set": map[string]interface{}{
  1693. "i_state": 4,
  1694. "l_updatetime": time.Now().Unix(),
  1695. "l_uploadtime": time.Now().Unix(),
  1696. },
  1697. }
  1698. } else if num == 4 { //批量作废 -->关闭
  1699. update = map[string]interface{}{
  1700. "$set": map[string]interface{}{
  1701. "i_state": 6,
  1702. "l_complete": time.Now().Unix(),
  1703. "l_updatetime": time.Now().Unix(),
  1704. },
  1705. }
  1706. } else if num == 7 { //反馈信息 -->待审核
  1707. newData := map[string]interface{}{
  1708. "l_mrecord_comeintime": startTime,
  1709. "l_mrecord_complete": time.Now().Unix(),
  1710. "s_mrecord_remark": reason,
  1711. }
  1712. mrecord := []interface{}{}
  1713. mrecord = append(mrecord, newData)
  1714. update = map[string]interface{}{
  1715. "$set": map[string]interface{}{
  1716. "i_state": 3,
  1717. "l_complete": time.Now().Unix(),
  1718. "a_mrecord": mrecord,
  1719. "l_updatetime": time.Now().Unix(),
  1720. },
  1721. }
  1722. }
  1723. flag := u.MgoEB.Update("task", query, update, false, true)
  1724. log.Println("codeOrId:", query, " 修改任务状态:", flag)
  1725. }
  1726. }
  1727. // 更新节点
  1728. func (f *Front) ChangeEvent() {
  1729. auth := qu.IntAll(f.GetSession("auth"))
  1730. if auth != u.Role_Admin {
  1731. f.ServeJson("没有权限")
  1732. }
  1733. code := f.GetString("code")
  1734. event, _ := f.GetInt("event")
  1735. eventok := false
  1736. for k, _ := range sp.Config.Uploadevents {
  1737. if event == qu.Int64All(k) {
  1738. eventok = true
  1739. break
  1740. }
  1741. }
  1742. if !eventok {
  1743. f.ServeJson("没有对应节点")
  1744. return
  1745. }
  1746. //info, _ := u.MgoE.FindOne("luaconfig", map[string]interface{}{"code": code})
  1747. info, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
  1748. if len(*info) > 0 {
  1749. oldevent := qu.IntAll((*info)["event"])
  1750. if qu.IntAll((*info)["state"]) == Sp_state_5 {
  1751. //源节点下架
  1752. _, err := spider.UpdateSpiderByCodeState(code, fmt.Sprint(Sp_state_6), oldevent)
  1753. set := map[string]interface{}{
  1754. "$set": map[string]interface{}{
  1755. "event": qu.IntAll(event),
  1756. "state": Sp_state_6,
  1757. },
  1758. }
  1759. //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
  1760. u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
  1761. if err != nil && strings.Contains(err.Error(), "timeout") {
  1762. f.ServeJson("连接节点" + fmt.Sprint(oldevent) + "超时")
  1763. } else {
  1764. f.ServeJson(err.Error())
  1765. }
  1766. } else {
  1767. set := map[string]interface{}{
  1768. "$set": map[string]interface{}{
  1769. "event": qu.IntAll(event),
  1770. },
  1771. }
  1772. //u.MgoE.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
  1773. u.MgoEB.Update("luaconfig", map[string]interface{}{"code": code}, set, true, false)
  1774. }
  1775. } else {
  1776. f.ServeJson("没有对应记录")
  1777. }
  1778. }
  1779. // 验证用户是否有更改状态权限
  1780. func IsHasUpState(auth, state int) bool {
  1781. rep := false
  1782. switch auth {
  1783. case u.Role_Dev:
  1784. if state == Sp_state_1 || state == Sp_state_7 {
  1785. rep = true
  1786. }
  1787. case u.Role_Examine:
  1788. if state == Sp_state_2 || state == Sp_state_3 {
  1789. rep = true
  1790. }
  1791. case u.Role_Admin:
  1792. rep = true
  1793. default:
  1794. }
  1795. return rep
  1796. }
  1797. var list_fields = `{"_id":1,"code":1,"createuser":1,"modifyuser":1,"modifytime":1,"l_uploadtime":1,"l_checktime":1,"state":1,"param_common":1,"event":1,"urgency":1,"platform":1,"pendstate":1}`
  1798. // 脚本管理,结合爬虫运行信息
  1799. func (f *Front) LuaList() {
  1800. auth := qu.IntAll(f.GetSession("auth"))
  1801. if auth != u.Role_Admin {
  1802. f.ServeJson("没有权限!")
  1803. return
  1804. }
  1805. if f.Method() == "POST" {
  1806. state, _ := f.GetInteger("state")
  1807. event, _ := f.GetInteger("event")
  1808. start, _ := f.GetInteger("start")
  1809. limit, _ := f.GetInteger("length")
  1810. draw, _ := f.GetInteger("draw")
  1811. searchStr := f.GetString("search[value]")
  1812. //search := strings.Replace(searchStr, " ", "", -1)
  1813. search := strings.TrimSpace(searchStr)
  1814. platform := f.GetString("platform")
  1815. query := map[string]interface{}{}
  1816. queryArr := []interface{}{}
  1817. //搜索条件
  1818. if search != "" {
  1819. q1 := map[string]interface{}{}
  1820. q1["$or"] = []interface{}{
  1821. map[string]interface{}{"code": map[string]interface{}{"$regex": search}},
  1822. map[string]interface{}{"createuser": map[string]interface{}{"$regex": search}},
  1823. map[string]interface{}{"param_common.1": map[string]interface{}{"$regex": search}},
  1824. }
  1825. queryArr = append(queryArr, q1)
  1826. }
  1827. //爬虫状态
  1828. q2 := map[string]interface{}{}
  1829. if state > -1 {
  1830. q2 = map[string]interface{}{"state": state}
  1831. } else {
  1832. q2 = map[string]interface{}{
  1833. "state": map[string]interface{}{
  1834. "$in": []int{Sp_state_3, Sp_state_5, Sp_state_6},
  1835. },
  1836. }
  1837. }
  1838. queryArr = append(queryArr, q2)
  1839. //爬虫节点
  1840. q3 := map[string]interface{}{}
  1841. if event > -1 {
  1842. q3 = map[string]interface{}{"event": event}
  1843. queryArr = append(queryArr, q3)
  1844. }
  1845. //爬虫平台
  1846. q4 := map[string]interface{}{}
  1847. if platform != "-1" {
  1848. q4 = map[string]interface{}{"platform": platform}
  1849. queryArr = append(queryArr, q4)
  1850. }
  1851. query["$and"] = queryArr
  1852. sort := `{"%s":%d}`
  1853. orderIndex := f.GetString("order[0][column]")
  1854. orderName := f.GetString(fmt.Sprintf("columns[%s][data]", orderIndex))
  1855. orderType := 1
  1856. if f.GetString("order[0][dir]") != "asc" {
  1857. orderType = -1
  1858. }
  1859. sort = fmt.Sprintf(sort, orderName, orderType)
  1860. page := start / 10
  1861. //luas, _ := u.MgoE.Find("luaconfig", query, sort, list_fields, false, start, limit)
  1862. //count := u.MgoE.Count("luaconfig", query)
  1863. luas, _ := u.MgoEB.Find("luaconfig", query, sort, list_fields, false, start, limit)
  1864. count := u.MgoEB.Count("luaconfig", query)
  1865. qu.Debug("query:", query, start, limit, count, len(*luas))
  1866. for k, v := range *luas {
  1867. v["num"] = k + 1 + page*10
  1868. l_uploadtime := qu.Int64All(v["l_uploadtime"])
  1869. v["l_uploadtime"] = qu.FormatDateByInt64(&l_uploadtime, qu.Date_Full_Layout)
  1870. l_checktime := qu.Int64All(v["l_checktime"])
  1871. v["l_checktime"] = qu.FormatDateByInt64(&l_checktime, qu.Date_Full_Layout)
  1872. if l_checktime > 0 { //核对
  1873. v["is_check"] = true
  1874. } else { //未核对
  1875. v["is_check"] = false
  1876. }
  1877. if tmp, ok := spinfos.Load(v["code"]); ok {
  1878. info := tmp.(*spinfo)
  1879. v["modifytime"] = info.lastHeartbeat
  1880. v["yesterday"] = fmt.Sprint(info.yesterdayDowncount) + "/" + fmt.Sprint(info.yestoDayRequestNum)
  1881. v["terday"] = fmt.Sprint(info.todayDowncount) + "/" + fmt.Sprint(info.toDayRequestNum)
  1882. v["lastdowncount"] = info.lastDowncount
  1883. v["lstate"] = info.lstate
  1884. } else {
  1885. v["modifytime"] = ""
  1886. v["yesterday"] = ""
  1887. v["terday"] = ""
  1888. v["lastdowncount"] = 0
  1889. v["lstate"] = ""
  1890. }
  1891. }
  1892. f.ServeJson(map[string]interface{}{"draw": draw, "data": luas, "recordsFiltered": count, "recordsTotal": count})
  1893. } else {
  1894. events := []string{}
  1895. for k, _ := range sp.Config.Uploadevents {
  1896. events = append(events, k)
  1897. }
  1898. sort.Strings(events)
  1899. f.T["events"] = events
  1900. f.Render("lualist.html", &f.T)
  1901. }
  1902. }
  1903. // 查看是否有该任务
  1904. func CheckTask(codes []string, num int) []string {
  1905. // var id string = ""
  1906. query := map[string]interface{}{}
  1907. var idArr []string
  1908. if len(codes) > 0 {
  1909. for _, v := range codes {
  1910. if num == 1 {
  1911. query = map[string]interface{}{
  1912. "s_code": v,
  1913. "i_state": map[string]interface{}{
  1914. "$in": []int{1, 2, 5},
  1915. },
  1916. }
  1917. } else if num == 2 { //打回时查询待审核的任务
  1918. query = map[string]interface{}{
  1919. "s_code": v,
  1920. "i_state": 3,
  1921. }
  1922. } else if num == 3 { //审核通过时查询待处理、处理中、待审核、未通过的任务
  1923. query = map[string]interface{}{
  1924. "s_code": v,
  1925. "i_state": map[string]interface{}{
  1926. "$in": []int{1, 2, 3, 5},
  1927. },
  1928. }
  1929. } else if num == 7 {
  1930. query = map[string]interface{}{
  1931. "s_code": v,
  1932. "i_state": map[string]interface{}{
  1933. "$in": []int{2, 5},
  1934. },
  1935. }
  1936. }
  1937. task, _ := u.MgoEB.Find("task", query, nil, nil, false, -1, -1)
  1938. if task != nil {
  1939. for _, t := range *task {
  1940. idArr = append(idArr, mongodb.BsonIdToSId(t["_id"]))
  1941. }
  1942. }
  1943. return idArr
  1944. }
  1945. }
  1946. return idArr
  1947. }