|
@@ -57,6 +57,7 @@ type Front struct {
|
|
|
loadModel xweb.Mapper `xweb:"/center/gmodel/(.*)"` //加载模型
|
|
|
importdata xweb.Mapper `xweb:"/center/importdata"` //导入爬虫列表页面
|
|
|
importLua xweb.Mapper `xweb:"/center/importlua"` //导入爬虫
|
|
|
+ importAi xweb.Mapper `xweb:"/center/importai"` //导入ai爬虫
|
|
|
oldedit xweb.Mapper `xweb:"/center/oldedit"` //老文件编辑
|
|
|
findName xweb.Mapper `xweb:"/center/findname"` //即时查询名称
|
|
|
checkrepeat xweb.Mapper `xweb:"/center/spider/isrepeat"` //脚本代码判重
|
|
@@ -320,12 +321,7 @@ func (f *Front) ImportLua() {
|
|
|
if cells[1].Value != "" {
|
|
|
code := cells[1].Value
|
|
|
code = u.SymbolReg.ReplaceAllString(code, "")
|
|
|
- query := map[string]interface{}{"code": cells[1].Value}
|
|
|
- rs, _ := u.MgoEB.FindOne("import", query)
|
|
|
- if len(*rs) > 0 {
|
|
|
- errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫代码重复,请修改"
|
|
|
- continue
|
|
|
- }
|
|
|
+ code = u.CheckCode(code)
|
|
|
o := make(map[string]interface{})
|
|
|
o["name"] = u.SymbolReg.ReplaceAllString(cells[0].Value, "")
|
|
|
o["code"] = u.SymbolReg.ReplaceAllString(code, "")
|
|
@@ -626,6 +622,282 @@ func saveLua(o map[string]interface{}) bool {
|
|
|
return ok
|
|
|
}
|
|
|
|
|
|
+func (f *Front) ImportAi() {
|
|
|
+ auth := qu.IntAll(f.GetSession("auth"))
|
|
|
+ if auth != u.Role_Admin {
|
|
|
+ f.ServeJson("没有权限")
|
|
|
+ return
|
|
|
+ }
|
|
|
+ if f.Method() == "POST" {
|
|
|
+ mf, _, err := f.GetFile("xlsx")
|
|
|
+ errorinfo := map[string]interface{}{}
|
|
|
+ if err == nil {
|
|
|
+ binary, _ := ioutil.ReadAll(mf)
|
|
|
+ xls, _ := xlsx.OpenBinary(binary)
|
|
|
+ sheet := xls.Sheets[0]
|
|
|
+ rows := sheet.Rows
|
|
|
+ for k, v := range rows {
|
|
|
+ if k != 0 {
|
|
|
+ cells := v.Cells
|
|
|
+ if cells[1].Value != "" {
|
|
|
+ code := cells[1].Value
|
|
|
+ code = u.SymbolReg.ReplaceAllString(code, "")
|
|
|
+ code = u.CheckCode(code)
|
|
|
+ o := make(map[string]interface{})
|
|
|
+ o["name"] = u.SymbolReg.ReplaceAllString(cells[0].Value, "")
|
|
|
+ o["code"] = u.SymbolReg.ReplaceAllString(code, "")
|
|
|
+ o["channel"] = cells[2].Value
|
|
|
+ spiderremark := cells[3].Value
|
|
|
+ if spiderremark == "" {
|
|
|
+ spiderremark = `采集“` + cells[2].Value + `”栏目(含子栏目)`
|
|
|
+ }
|
|
|
+ o["spiderremark"] = spiderremark
|
|
|
+ //重复域名的网站不再新增爬虫
|
|
|
+ href := cells[4].Value
|
|
|
+ o["channeladdr"] = href
|
|
|
+ o["timestamp"] = time.Now().Unix()
|
|
|
+ o["modifyuser"] = cells[5].Value
|
|
|
+ o["event"] = cells[6].Value
|
|
|
+ o["incrementevent"] = cells[7].Value
|
|
|
+ if cells[8].Value == "是" {
|
|
|
+ o["isflow"] = 1
|
|
|
+ } else {
|
|
|
+ o["isflow"] = 0
|
|
|
+ }
|
|
|
+ o["priority"], _ = cells[9].Int()
|
|
|
+ o["platform"] = cells[10].Value
|
|
|
+ o["area"] = cells[11].Value
|
|
|
+ o["city"] = cells[12].Value
|
|
|
+ o["district"] = cells[13].Value
|
|
|
+ weigh, _ := cells[14].Int()
|
|
|
+ if weigh == -1 {
|
|
|
+ weigh = 1
|
|
|
+ }
|
|
|
+ o["weight"] = weigh
|
|
|
+ //存储表
|
|
|
+ coll := cells[15].Value
|
|
|
+ if coll == "" {
|
|
|
+ coll = "bidding"
|
|
|
+ }
|
|
|
+ //爬虫类型
|
|
|
+ infoformat, _ := cells[16].Int()
|
|
|
+ if infoformat < 1 {
|
|
|
+ errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",缺少爬虫类型信息"
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ o["infoformat"] = infoformat
|
|
|
+ if strings.Contains(code, "bidding") {
|
|
|
+ errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",存储表错误"
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ o["coll"] = coll
|
|
|
+ ok := saveLuaAi(o) //保存爬虫
|
|
|
+ if ok == false {
|
|
|
+ errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行找不到作者,已经过滤"
|
|
|
+ } else {
|
|
|
+ o["status"] = 1
|
|
|
+ o["author"] = o["modifyuser"]
|
|
|
+ o["importuser"] = f.GetSession("username")
|
|
|
+ u.MgoEB.Save("import", o)
|
|
|
+ u.CommCodesCache <- map[string]string{
|
|
|
+ "code": code,
|
|
|
+ "href": href,
|
|
|
+ "site": qu.ObjToString(o["name"]),
|
|
|
+ "channel": qu.ObjToString(o["channel"]),
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ f.ServeJson(errorinfo)
|
|
|
+ } else {
|
|
|
+ f.ServeJson(false)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+func saveLuaAi(o map[string]interface{}) bool {
|
|
|
+ //基础字段
|
|
|
+ param := map[string]interface{}{
|
|
|
+ "recovertime": int64(0), //回收时间
|
|
|
+ "claimtime": int64(0), //认领时间
|
|
|
+ "claimtype": CLAIMTYPEUNCLAIMED, //爬虫认领状态(未认领)
|
|
|
+ "priority": o["priority"], //优先级
|
|
|
+ "spidercompete": true, //2021-11-20后爬虫加此字段(表示新爬虫,剑鱼网站不展示原文)
|
|
|
+ "spiderhistorymaxpage": 1, //历史最大页
|
|
|
+ "pendstate": 0, //
|
|
|
+ "grade": 0, //爬虫难易度(主要用于python爬虫使用)
|
|
|
+ "spiderimportant": false, //是否为重点网站爬虫
|
|
|
+ "urgency": 0,
|
|
|
+ "incrementevent": qu.IntAll(o["incrementevent"]),
|
|
|
+ "platform": o["platform"],
|
|
|
+ "weight": o["weight"],
|
|
|
+ "infoformat": o["infoformat"],
|
|
|
+ //"urgency": o["urgency"],
|
|
|
+ "isflow": o["isflow"],
|
|
|
+ "spidertype": "history",
|
|
|
+ "spiderremark": o["spiderremark"],
|
|
|
+ "omnipotent": false,
|
|
|
+ }
|
|
|
+ AutoTpl["Base.SpiderName"] = o["name"]
|
|
|
+ AutoTpl["Base.SpiderCode"] = o["code"]
|
|
|
+ AutoTpl["Base.SpiderChannel"] = o["channel"]
|
|
|
+ AutoTpl["Base.SpiderTargetChannelUrl"] = o["channeladdr"]
|
|
|
+ modifyuser := o["modifyuser"].(string)
|
|
|
+ var userid, email string
|
|
|
+ claimLog := map[string]interface{}{}
|
|
|
+ common := []interface{}{
|
|
|
+ AutoTpl["Base.SpiderCode"],
|
|
|
+ AutoTpl["Base.SpiderName"],
|
|
|
+ AutoTpl["Base.SpiderChannel"],
|
|
|
+ AutoTpl["Base.SpiderDownDetailPage"],
|
|
|
+ AutoTpl["Base.SpiderStartPage"],
|
|
|
+ AutoTpl["Base.SpiderMaxPage"],
|
|
|
+ AutoTpl["Base.SpiderRunRate"],
|
|
|
+ //AutoTpl["Base.Spider2Collection"],
|
|
|
+ //"bidding", //爬虫导入新建默认为bidding
|
|
|
+ qu.ObjToString(o["coll"]),
|
|
|
+ AutoTpl["Base.SpiderPageEncoding"],
|
|
|
+ AutoTpl["Base.SpiderStoreMode"],
|
|
|
+ AutoTpl["Base.SpiderStoreToMsgEvent"],
|
|
|
+ AutoTpl["Base.SpiderTargetChannelUrl"],
|
|
|
+ AutoTpl["Base.SpiderLastDownloadTime"],
|
|
|
+ AutoTpl["Base.SpiderIsHistoricalMend"],
|
|
|
+ AutoTpl["Base.SpiderIsMustDownload"],
|
|
|
+ }
|
|
|
+ ptime := []interface{}{
|
|
|
+ AutoTpl["Step1.DateFormat"],
|
|
|
+ AutoTpl["Step1.Address"],
|
|
|
+ AutoTpl["Step1.ContentChooser"],
|
|
|
+ }
|
|
|
+ list := []interface{}{
|
|
|
+ AutoTpl["Step2.Listadd"],
|
|
|
+ AutoTpl["Step2.Listadds"],
|
|
|
+ AutoTpl["Step2.BlockChooser"],
|
|
|
+ AutoTpl["Step2.AddressChooser"],
|
|
|
+ AutoTpl["Step2.TitleChooser"],
|
|
|
+ AutoTpl["Step2.DateChooser"],
|
|
|
+ AutoTpl["Step2.DateFormat"],
|
|
|
+ }
|
|
|
+ content := []interface{}{
|
|
|
+ AutoTpl["Step3.ContentChooser"],
|
|
|
+ AutoTpl["Step3.ElementChooser"],
|
|
|
+ }
|
|
|
+ param["param_common"] = common
|
|
|
+ //向导模式
|
|
|
+ param["param_time"] = ptime
|
|
|
+ param["param_list"] = list
|
|
|
+ param["param_content"] = content
|
|
|
+ param["type_time"] = 0
|
|
|
+ param["type_list"] = 0
|
|
|
+ param["type_content"] = 0
|
|
|
+ //专家模式
|
|
|
+ param["str_time"] = ""
|
|
|
+ param["str_list"] = ""
|
|
|
+ param["str_content"] = ""
|
|
|
+ param["comeintime"] = time.Now().Unix()
|
|
|
+ param["code"] = o["code"]
|
|
|
+ param["site"] = o["name"]
|
|
|
+ param["href"] = o["channeladdr"]
|
|
|
+ param["channel"] = o["channel"]
|
|
|
+ param["createuser"] = modifyuser
|
|
|
+ param["createuserid"] = userid
|
|
|
+ param["createuseremail"] = email
|
|
|
+ param["modifyuser"] = modifyuser
|
|
|
+ param["modifyuserid"] = userid
|
|
|
+ param["modifytime"] = time.Now().Unix()
|
|
|
+ param["state"] = 0 //未完成
|
|
|
+ if qu.IntAll(o["event"]) > 0 {
|
|
|
+ param["event"] = qu.IntAll(o["event"])
|
|
|
+ }
|
|
|
+ s_model := "bid"
|
|
|
+ configModel := util.Config.Model[s_model]
|
|
|
+ model := map[string]interface{}{}
|
|
|
+ for k, _ := range configModel {
|
|
|
+ model[k] = qu.ObjToString(o[k])
|
|
|
+ }
|
|
|
+ param["model"] = model
|
|
|
+ param["next"] = email
|
|
|
+ incrementevent := qu.ObjToString(o["incrementevent"])
|
|
|
+ if movevent, ok := util.Config.Uploadevents[incrementevent].(string); ok && movevent != "" {
|
|
|
+ param["spidermovevent"] = movevent
|
|
|
+ }
|
|
|
+ infoformat := qu.IntAll(o["infoformat"])
|
|
|
+ infotype := "招标"
|
|
|
+ if infoformat == 2 {
|
|
|
+ infotype = "拟建/审批"
|
|
|
+ } else if infoformat == 3 {
|
|
|
+ infotype = "产权"
|
|
|
+ } else if infoformat == 4 {
|
|
|
+ infotype = "舆情"
|
|
|
+ }
|
|
|
+ ok := spider.SaveSpider(o["code"].(string), param) //爬虫保存
|
|
|
+ if ok { //保存成功,校验新导入的爬虫对应站点是否存在,否则加站点记录
|
|
|
+ site, _ := u.MgoEB.FindOneByField("site", map[string]interface{}{"site": o["name"]}, map[string]interface{}{"important": 1})
|
|
|
+ if len(*site) == 0 {
|
|
|
+ qu.Debug("补充站点信息:", o["name"])
|
|
|
+ domain := u.DomainReg.FindString(qu.ObjToString(AutoTpl["Base.SpiderTargetChannelUrl"]))
|
|
|
+ if domain != "" {
|
|
|
+ domain = u.ReplaceReg.ReplaceAllString(domain, "")
|
|
|
+ }
|
|
|
+ siteInfo := map[string]interface{}{
|
|
|
+ "site": o["name"],
|
|
|
+ "domain": domain, //
|
|
|
+ "another_name": "",
|
|
|
+ "area": qu.ObjToString(model["area"]),
|
|
|
+ "city": qu.ObjToString(model["city"]),
|
|
|
+ "district": qu.ObjToString(model["district"]),
|
|
|
+ "site_type": "",
|
|
|
+ "second_type": "",
|
|
|
+ "industry": "",
|
|
|
+ "p_site": "",
|
|
|
+ "s_site": "",
|
|
|
+ "remarktime": time.Now().Unix(),
|
|
|
+ "event": incrementevent,
|
|
|
+ "platform": o["platform"],
|
|
|
+ "spider_status": "0/1",
|
|
|
+ "updatetime": time.Now().Unix(),
|
|
|
+ "delete": false,
|
|
|
+ "comeintime": time.Now().Unix(),
|
|
|
+ "important": 0,
|
|
|
+ "site_status": 1,
|
|
|
+ "lasttime": int64(0),
|
|
|
+ "site_datanum": 0,
|
|
|
+ "period": float32(0),
|
|
|
+ "infotype": infotype,
|
|
|
+ "sponsor": "",
|
|
|
+ "isneedregister": 0,
|
|
|
+ "isregistered": 0,
|
|
|
+ "special_type": "",
|
|
|
+ "account": "",
|
|
|
+ "password": "",
|
|
|
+ "f_area": "",
|
|
|
+ "f_city": "",
|
|
|
+ "f_district": "",
|
|
|
+ "site_subtype": "",
|
|
|
+ "site_toptype": "",
|
|
|
+ "type_plate": "",
|
|
|
+ }
|
|
|
+ u.MgoEB.Save("site", siteInfo)
|
|
|
+ } else if qu.IntAll((*site)["important"]) == 1 { //重点网站
|
|
|
+ u.MgoEB.Update("luaconfig", map[string]interface{}{"code": o["code"]}, map[string]interface{}{"$set": map[string]interface{}{"spiderimportant": true}}, false, false)
|
|
|
+ u.MgoEB.Update("site_code_baseinfo", map[string]interface{}{"spidercode": o["code"]}, map[string]interface{}{"$set": map[string]interface{}{
|
|
|
+ "site": o["name"],
|
|
|
+ "channel": o["channel"],
|
|
|
+ "spidercode": o["code"],
|
|
|
+ "platform": o["platform"],
|
|
|
+ "modifyuser": "",
|
|
|
+ "state": 0,
|
|
|
+ }}, true, false)
|
|
|
+ }
|
|
|
+ //生成认领日志
|
|
|
+ if len(claimLog) > 0 {
|
|
|
+ u.MgoEB.Save("lua_logs_claim", claimLog)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return ok
|
|
|
+}
|
|
|
+
|
|
|
func (f *Front) Importdata() {
|
|
|
auth := qu.IntAll(f.GetSession("auth"))
|
|
|
if auth == u.Role_Admin {
|