|
@@ -97,12 +97,12 @@ type Front struct {
|
|
|
claimCode xweb.Mapper `xweb:"/center/claim/claimcode"` //爬虫认领
|
|
|
returnCode xweb.Mapper `xweb:"/center/claim/returncode"` //爬虫归还
|
|
|
|
|
|
- //通用爬虫
|
|
|
- commCodesCenter xweb.Mapper `xweb:"/center/commMonitor"` //通用爬虫管理中心
|
|
|
+ //可视化爬虫
|
|
|
+ commCodesCenter xweb.Mapper `xweb:"/center/commMonitor"` //可视化爬虫管理中心
|
|
|
updateCodeBase xweb.Mapper `xweb:"/center/commspider/updatecodebase"` //
|
|
|
}
|
|
|
|
|
|
-const Sp_state_0, Sp_state_1, Sp_state_2, Sp_state_3, Sp_state_4, Sp_state_5, Sp_state_6, Sp_state_7, Sp_state_8, Sp_state_9, Sp_state_10 = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 //0待完成,1待审核,2打回,3发布,4作废,5已上架,6已下架,7无发布,8需登录,9转python,10已删除
|
|
|
+//const Sp_state_0, Sp_state_1, Sp_state_2, Sp_state_3, Sp_state_4, Sp_state_5, Sp_state_6, Sp_state_7, Sp_state_8, Sp_state_9, Sp_state_10 = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 //0待完成,1待审核,2打回,3发布,4作废,5已上架,6已下架,7无发布,8需登录,9转python,10已删除
|
|
|
|
|
|
var spinfos sync.Map = sync.Map{}
|
|
|
var SessMap map[string]*httpsession.Session
|
|
@@ -193,7 +193,7 @@ func (f *Front) LoadIndex() {
|
|
|
if state > -1 {
|
|
|
query["state"] = state
|
|
|
} else if auth == u.Role_Examine {
|
|
|
- query["state"] = Sp_state_1
|
|
|
+ query["state"] = u.Sp_state_1
|
|
|
}
|
|
|
}
|
|
|
sort := `{"%s":%d}`
|
|
@@ -341,7 +341,7 @@ func (f *Front) ImportLua() {
|
|
|
href := cells[4].Value
|
|
|
one, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"href": href})
|
|
|
if len(*one) > 0 {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",该栏目已存在,请确认"
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行爬虫" + code + ",该栏目已存在,请确认"
|
|
|
//continue
|
|
|
}
|
|
|
o["channeladdr"] = href
|
|
@@ -372,13 +372,13 @@ func (f *Front) ImportLua() {
|
|
|
//爬虫类型
|
|
|
infoformat, _ := cells[16].Int()
|
|
|
if infoformat < 1 {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",缺少爬虫类型信息"
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行爬虫" + code + ",缺少爬虫类型信息"
|
|
|
continue
|
|
|
}
|
|
|
o["infoformat"] = infoformat
|
|
|
|
|
|
if strings.Contains(code, "bidding") {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",存储表错误"
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行爬虫" + code + ",存储表错误"
|
|
|
continue
|
|
|
}
|
|
|
o["coll"] = coll
|
|
@@ -387,7 +387,7 @@ func (f *Front) ImportLua() {
|
|
|
//o["transfercode"] = qu.IntAll(Transfercode[table])
|
|
|
ok := saveLua(o) //保存爬虫
|
|
|
if ok == false {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行找不到作者,已经过滤"
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行找不到作者,已经过滤"
|
|
|
} else {
|
|
|
o["status"] = 1
|
|
|
o["author"] = o["modifyuser"]
|
|
@@ -416,6 +416,8 @@ func saveLua(o map[string]interface{}) bool {
|
|
|
platform := qu.ObjToString(o["platform"])
|
|
|
var userid, email string
|
|
|
claimLog := map[string]interface{}{}
|
|
|
+ now := time.Now().Unix()
|
|
|
+ expiretime := now + 5*86400
|
|
|
if modifyuser != "" { //指定维护人
|
|
|
one, _ := u.MgoEB.FindOne("user", map[string]interface{}{"s_name": modifyuser, "i_auth": 1, "i_delete": 0})
|
|
|
if len(*one) == 0 {
|
|
@@ -423,37 +425,33 @@ func saveLua(o map[string]interface{}) bool {
|
|
|
}
|
|
|
email = qu.ObjToString((*one)["s_email"])
|
|
|
userid = mongodb.BsonIdToSId((*one)["_id"])
|
|
|
- if platform != "python" {
|
|
|
- now := time.Now().Unix()
|
|
|
- recovertime := now + 365*86400
|
|
|
- param["claimtime"] = now //认领时间
|
|
|
- param["claimtype"] = CLAIMTYPECLAIMED
|
|
|
- param["recovertime"] = recovertime
|
|
|
- claimLog = map[string]interface{}{
|
|
|
- "site": o["name"],
|
|
|
- "code": o["code"],
|
|
|
- "channel": o["channel"],
|
|
|
- "modifyuser": modifyuser,
|
|
|
- "priority": priority,
|
|
|
- "stype": "认领",
|
|
|
- "comeintime": now,
|
|
|
- "claimtime": now,
|
|
|
- "recovertime": recovertime,
|
|
|
- "returntime": int64(0),
|
|
|
- "important": false,
|
|
|
- "returnreason": "",
|
|
|
- "claimrecovertype": 0,
|
|
|
- "source": "爬虫指定维护人导入",
|
|
|
- }
|
|
|
- } else {
|
|
|
- param["recovertime"] = int64(0) //回收时间
|
|
|
- param["claimtime"] = int64(0) //认领时间
|
|
|
- param["claimtype"] = CLAIMTYPEHISTORY //爬虫认领状态(历史爬虫)
|
|
|
+ recovertime := u.GetRecoverTime(now, platform)
|
|
|
+ param["claimtime"] = now //认领时间
|
|
|
+ param["claimtype"] = CLAIMTYPECLAIMED //认领状态
|
|
|
+ param["recovertime"] = recovertime //回收时间
|
|
|
+ param["expiretime"] = expiretime //过期时间
|
|
|
+ claimLog = map[string]interface{}{
|
|
|
+ "site": o["name"],
|
|
|
+ "code": o["code"],
|
|
|
+ "channel": o["channel"],
|
|
|
+ "modifyuser": modifyuser,
|
|
|
+ "priority": priority,
|
|
|
+ "stype": "认领",
|
|
|
+ "comeintime": now,
|
|
|
+ "claimtime": now,
|
|
|
+ "recovertime": recovertime,
|
|
|
+ "expiretime": expiretime,
|
|
|
+ "returntime": int64(0),
|
|
|
+ "important": false,
|
|
|
+ "returnreason": "",
|
|
|
+ "claimrecovertype": 0,
|
|
|
+ "source": "爬虫指定维护人导入",
|
|
|
}
|
|
|
} else { //未指定人
|
|
|
- param["recovertime"] = int64(0) //回收时间
|
|
|
param["claimtime"] = int64(0) //认领时间
|
|
|
param["claimtype"] = CLAIMTYPEUNCLAIMED //爬虫认领状态(未认领)
|
|
|
+ param["recovertime"] = int64(0) //回收时间
|
|
|
+ param["expiretime"] = expiretime //过期时间
|
|
|
}
|
|
|
common := []interface{}{
|
|
|
AutoTpl["Base.SpiderCode"],
|
|
@@ -560,6 +558,8 @@ func saveLua(o map[string]interface{}) bool {
|
|
|
param["grade"] = 0 //爬虫难易度(主要用于python爬虫使用)
|
|
|
param["spiderimportant"] = false //是否为重点网站爬虫
|
|
|
param["urgency"] = 0
|
|
|
+ param["autotype"] = 0 //自动化采集类型(1:通用;2:整站)
|
|
|
+ param["allowclaim"] = true //允许被其它平台认领开发
|
|
|
//qu.Debug("param---", param)
|
|
|
|
|
|
ok := spider.SaveSpider(o["code"].(string), param)
|
|
@@ -672,9 +672,11 @@ func (f *Front) ImportAi() {
|
|
|
code = u.SymbolReg.ReplaceAllString(code, "")
|
|
|
code = u.CheckCode(code)
|
|
|
o := make(map[string]interface{})
|
|
|
- o["name"] = u.SymbolReg.ReplaceAllString(cells[0].Value, "")
|
|
|
+ site := u.SymbolReg.ReplaceAllString(cells[0].Value, "")
|
|
|
+ o["name"] = site
|
|
|
o["code"] = u.SymbolReg.ReplaceAllString(code, "")
|
|
|
- o["channel"] = cells[2].Value
|
|
|
+ channel := cells[2].Value
|
|
|
+ o["channel"] = channel
|
|
|
spiderremark := cells[3].Value
|
|
|
if spiderremark == "" {
|
|
|
spiderremark = `采集“` + cells[2].Value + `”栏目(含子栏目)`
|
|
@@ -710,34 +712,45 @@ func (f *Front) ImportAi() {
|
|
|
//爬虫类型
|
|
|
infoformat, _ := cells[16].Int()
|
|
|
if infoformat < 1 {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",缺少爬虫类型信息"
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行爬虫" + code + ",缺少爬虫类型信息"
|
|
|
continue
|
|
|
}
|
|
|
o["infoformat"] = infoformat
|
|
|
if strings.Contains(code, "bidding") {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",存储表错误"
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行爬虫" + code + ",存储表错误"
|
|
|
continue
|
|
|
}
|
|
|
o["coll"] = coll
|
|
|
ok := saveLuaAi(o) //保存爬虫
|
|
|
if ok == false {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行找不到作者,已经过滤"
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行找不到作者,已经过滤"
|
|
|
} else {
|
|
|
o["status"] = 1
|
|
|
o["author"] = o["modifyuser"]
|
|
|
o["importuser"] = f.GetSession("username")
|
|
|
u.MgoEB.Save("import", o)
|
|
|
- stype := "bid"
|
|
|
- if infoformat == 5 {
|
|
|
- stype = "news"
|
|
|
- }
|
|
|
- commutil.CommCodesCache <- map[string]interface{}{
|
|
|
- "code": code,
|
|
|
- "href": href,
|
|
|
- "site": o["name"],
|
|
|
- "channel": o["channel"],
|
|
|
- "stype": stype,
|
|
|
+ //Ai通用校验
|
|
|
+ err := commutil.AiCheckTaskPub(site, channel, code, href, coll, infoformat, true)
|
|
|
+ if err != nil {
|
|
|
+ errorinfo[code] = "第" + strconv.Itoa(k) + "行爬虫" + code + ",任务发布失败,请联系管理员!"
|
|
|
+ qu.Debug("任务发布失败:", code, err)
|
|
|
+ } else {
|
|
|
+ qu.Debug("worker任务发布成功:", code)
|
|
|
}
|
|
|
+ //stype := "bid"
|
|
|
+ //if infoformat == 5 {
|
|
|
+ // stype = "news"
|
|
|
+ //}
|
|
|
+ //commutil.CommCodesCache <- map[string]interface{}{
|
|
|
+ // "code": code,
|
|
|
+ // "href": href,
|
|
|
+ // "site": o["name"],
|
|
|
+ // "channel": o["channel"],
|
|
|
+ // "stype": stype,
|
|
|
+ // "infoformat": infoformat,
|
|
|
+ // "coll": coll,
|
|
|
+ // "spidercompete": true,
|
|
|
+ //}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -771,14 +784,15 @@ func saveLuaAi(o map[string]interface{}) bool {
|
|
|
"spidertype": "history",
|
|
|
"spiderremark": o["spiderremark"],
|
|
|
"omnipotent": false,
|
|
|
+ "autotype": 0, //自动化采集类型(1:通用;2:整站)
|
|
|
+ "allowclaim": true,
|
|
|
}
|
|
|
AutoTpl["Base.SpiderName"] = o["name"]
|
|
|
AutoTpl["Base.SpiderCode"] = o["code"]
|
|
|
AutoTpl["Base.SpiderChannel"] = o["channel"]
|
|
|
AutoTpl["Base.SpiderTargetChannelUrl"] = o["channeladdr"]
|
|
|
- modifyuser := o["modifyuser"].(string)
|
|
|
- var userid, email string
|
|
|
- claimLog := map[string]interface{}{}
|
|
|
+ //modifyuser := o["modifyuser"].(string)
|
|
|
+ //var userid, email string
|
|
|
common := []interface{}{
|
|
|
AutoTpl["Base.SpiderCode"],
|
|
|
AutoTpl["Base.SpiderName"],
|
|
@@ -833,11 +847,11 @@ func saveLuaAi(o map[string]interface{}) bool {
|
|
|
param["site"] = o["name"]
|
|
|
param["href"] = o["channeladdr"]
|
|
|
param["channel"] = o["channel"]
|
|
|
- param["createuser"] = modifyuser
|
|
|
- param["createuserid"] = userid
|
|
|
- param["createuseremail"] = email
|
|
|
- param["modifyuser"] = modifyuser
|
|
|
- param["modifyuserid"] = userid
|
|
|
+ param["createuser"] = u.AI_USER
|
|
|
+ param["createuserid"] = u.AI_USERID
|
|
|
+ param["createuseremail"] = u.AI_USEREMAIL
|
|
|
+ param["modifyuser"] = u.AI_USER
|
|
|
+ param["modifyuserid"] = u.AI_USERID
|
|
|
param["modifytime"] = time.Now().Unix()
|
|
|
param["state"] = 0 //未完成
|
|
|
if qu.IntAll(o["event"]) > 0 {
|
|
@@ -850,7 +864,7 @@ func saveLuaAi(o map[string]interface{}) bool {
|
|
|
model[k] = qu.ObjToString(o[k])
|
|
|
}
|
|
|
param["model"] = model
|
|
|
- param["next"] = email
|
|
|
+ param["next"] = u.AI_USEREMAIL
|
|
|
incrementevent := qu.ObjToString(o["incrementevent"])
|
|
|
if movevent, ok := util.Config.Uploadevents[incrementevent].(string); ok && movevent != "" {
|
|
|
param["spidermovevent"] = movevent
|
|
@@ -931,10 +945,6 @@ func saveLuaAi(o map[string]interface{}) bool {
|
|
|
}}, true, false)
|
|
|
}
|
|
|
}
|
|
|
- //生成认领日志
|
|
|
- if len(claimLog) > 0 {
|
|
|
- u.MgoEB.Save("lua_logs_claim", claimLog)
|
|
|
- }
|
|
|
//爬虫基本属性
|
|
|
u.MgoEB.Save("luaconfig_cfg", map[string]interface{}{
|
|
|
"site_toptype": site_toptype,
|
|
@@ -1160,7 +1170,7 @@ func (f *Front) SpiderUpdatePlatform() {
|
|
|
}
|
|
|
set := map[string]interface{}{}
|
|
|
set["platform"] = platform
|
|
|
- if platform == "python" && platform == "jschrome" {
|
|
|
+ if platform != "golua平台" && platform != "chrome" {
|
|
|
var err error
|
|
|
var b bool
|
|
|
b = u.MgoS.Update("spider_heart", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"del": true}}, false, true)
|
|
@@ -1405,13 +1415,12 @@ func (f *Front) UpdateESP() {
|
|
|
} else if w == "platform" {
|
|
|
//常规字段更新
|
|
|
set["platform"] = val
|
|
|
- //set["comeintime"] = time.Now().Unix()
|
|
|
- if val == "python" && val == "jschrome" {
|
|
|
+ if val != "golua平台" && val != "chrome" {
|
|
|
b := u.MgoS.Update("spider_heart", map[string]interface{}{"code": code}, map[string]interface{}{"$set": map[string]interface{}{"del": true}}, false, true)
|
|
|
qu.Debug("Del Heart:", b)
|
|
|
var err error
|
|
|
pf := qu.ObjToString((*one)["platform"])
|
|
|
- if pf == "golua平台" || pf == "chrome" { //爬虫所属golua平台
|
|
|
+ if pf == "golua平台" || pf == "chrome" { //爬虫原平台所属golua平台,进行下架
|
|
|
b, err = spider.UpdateSpiderByCodeState(code, "6", qu.IntAll((*one)["event"])) //下架
|
|
|
if b && err == nil {
|
|
|
//历史节点下架为了避免线上运行爬虫待完成时改为7000采集历史,但是又转到其他平台,导致原线上运行节点爬虫并未下线,心跳异常
|
|
@@ -1464,7 +1473,7 @@ func (f *Front) UpdateESP() {
|
|
|
set["event"] = event
|
|
|
set["incrementevent"] = event
|
|
|
set["state"] = 6
|
|
|
- b, err := UpStateAndUpSpider(code, "", "", "", Sp_state_6) //线上爬虫下架
|
|
|
+ b, err := UpStateAndUpSpider(code, "", "", "", u.Sp_state_6) //线上爬虫下架
|
|
|
qu.Debug("爬虫下架成功:", b)
|
|
|
if !b || err != nil {
|
|
|
f.Write("n")
|