Selaa lähdekoodia

新增万能爬虫检测流程

mxs 3 kuukautta sitten
vanhempi
commit
1a82879f60

+ 2 - 1
src/config.json

@@ -14,7 +14,7 @@
     "udport": 1499,
     "udpaddr": "127.0.0.1",
     "localudport": ":1498",
-    "redisservers": "title_repeat_judgement=192.168.3.18:2379",
+    "redisservers": "title_repeat_judgement=172.20.45.130:1579",
     "msgservers": {
         "comm": {
             "addr": "spdata.jianyu360.com:801",
@@ -105,6 +105,7 @@
     "renderaddr": "http://192.168.3.182:8050/render.json",
     "proxyaddr": "http://cc.spdata.jianyu360.com/crawl/proxy/socks5/fetch",
     "proxyauthor": "Basic amlhbnl1MDAxOjEyM3F3ZSFB",
+    "optcodeserver": "http://127.0.0.1:7081/check",
     "luadisablelib": {
         "baselib": {
             "print": false

+ 278 - 6
src/front/front.go

@@ -57,6 +57,7 @@ type Front struct {
 	loadModel            xweb.Mapper `xweb:"/center/gmodel/(.*)"`           //加载模型
 	importdata           xweb.Mapper `xweb:"/center/importdata"`            //导入爬虫列表页面
 	importLua            xweb.Mapper `xweb:"/center/importlua"`             //导入爬虫
+	importAi             xweb.Mapper `xweb:"/center/importai"`              //导入ai爬虫
 	oldedit              xweb.Mapper `xweb:"/center/oldedit"`               //老文件编辑
 	findName             xweb.Mapper `xweb:"/center/findname"`              //即时查询名称
 	checkrepeat          xweb.Mapper `xweb:"/center/spider/isrepeat"`       //脚本代码判重
@@ -320,12 +321,7 @@ func (f *Front) ImportLua() {
 					if cells[1].Value != "" {
 						code := cells[1].Value
 						code = u.SymbolReg.ReplaceAllString(code, "")
-						query := map[string]interface{}{"code": cells[1].Value}
-						rs, _ := u.MgoEB.FindOne("import", query)
-						if len(*rs) > 0 {
-							errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫代码重复,请修改"
-							continue
-						}
+						code = u.CheckCode(code)
 						o := make(map[string]interface{})
 						o["name"] = u.SymbolReg.ReplaceAllString(cells[0].Value, "")
 						o["code"] = u.SymbolReg.ReplaceAllString(code, "")
@@ -626,6 +622,282 @@ func saveLua(o map[string]interface{}) bool {
 	return ok
 }
 
+func (f *Front) ImportAi() {
+	auth := qu.IntAll(f.GetSession("auth"))
+	if auth != u.Role_Admin {
+		f.ServeJson("没有权限")
+		return
+	}
+	if f.Method() == "POST" {
+		mf, _, err := f.GetFile("xlsx")
+		errorinfo := map[string]interface{}{}
+		if err == nil {
+			binary, _ := ioutil.ReadAll(mf)
+			xls, _ := xlsx.OpenBinary(binary)
+			sheet := xls.Sheets[0]
+			rows := sheet.Rows
+			for k, v := range rows {
+				if k != 0 {
+					cells := v.Cells
+					if cells[1].Value != "" {
+						code := cells[1].Value
+						code = u.SymbolReg.ReplaceAllString(code, "")
+						code = u.CheckCode(code)
+						o := make(map[string]interface{})
+						o["name"] = u.SymbolReg.ReplaceAllString(cells[0].Value, "")
+						o["code"] = u.SymbolReg.ReplaceAllString(code, "")
+						o["channel"] = cells[2].Value
+						spiderremark := cells[3].Value
+						if spiderremark == "" {
+							spiderremark = `采集“` + cells[2].Value + `”栏目(含子栏目)`
+						}
+						o["spiderremark"] = spiderremark
+						//重复域名的网站不再新增爬虫
+						href := cells[4].Value
+						o["channeladdr"] = href
+						o["timestamp"] = time.Now().Unix()
+						o["modifyuser"] = cells[5].Value
+						o["event"] = cells[6].Value
+						o["incrementevent"] = cells[7].Value
+						if cells[8].Value == "是" {
+							o["isflow"] = 1
+						} else {
+							o["isflow"] = 0
+						}
+						o["priority"], _ = cells[9].Int()
+						o["platform"] = cells[10].Value
+						o["area"] = cells[11].Value
+						o["city"] = cells[12].Value
+						o["district"] = cells[13].Value
+						weigh, _ := cells[14].Int()
+						if weigh == -1 {
+							weigh = 1
+						}
+						o["weight"] = weigh
+						//存储表
+						coll := cells[15].Value
+						if coll == "" {
+							coll = "bidding"
+						}
+						//爬虫类型
+						infoformat, _ := cells[16].Int()
+						if infoformat < 1 {
+							errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",缺少爬虫类型信息"
+							continue
+						}
+						o["infoformat"] = infoformat
+						if strings.Contains(code, "bidding") {
+							errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行爬虫" + cells[1].Value + ",存储表错误"
+							continue
+						}
+						o["coll"] = coll
+						ok := saveLuaAi(o) //保存爬虫
+						if ok == false {
+							errorinfo[cells[1].Value] = "第" + strconv.Itoa(k) + "行找不到作者,已经过滤"
+						} else {
+							o["status"] = 1
+							o["author"] = o["modifyuser"]
+							o["importuser"] = f.GetSession("username")
+							u.MgoEB.Save("import", o)
+							u.CommCodesCache <- map[string]string{
+								"code":    code,
+								"href":    href,
+								"site":    qu.ObjToString(o["name"]),
+								"channel": qu.ObjToString(o["channel"]),
+							}
+						}
+					}
+				}
+			}
+			f.ServeJson(errorinfo)
+		} else {
+			f.ServeJson(false)
+		}
+	}
+}
+
+func saveLuaAi(o map[string]interface{}) bool {
+	//基础字段
+	param := map[string]interface{}{
+		"recovertime":          int64(0),           //回收时间
+		"claimtime":            int64(0),           //认领时间
+		"claimtype":            CLAIMTYPEUNCLAIMED, //爬虫认领状态(未认领)
+		"priority":             o["priority"],      //优先级
+		"spidercompete":        true,               //2021-11-20后爬虫加此字段(表示新爬虫,剑鱼网站不展示原文)
+		"spiderhistorymaxpage": 1,                  //历史最大页
+		"pendstate":            0,                  //
+		"grade":                0,                  //爬虫难易度(主要用于python爬虫使用)
+		"spiderimportant":      false,              //是否为重点网站爬虫
+		"urgency":              0,
+		"incrementevent":       qu.IntAll(o["incrementevent"]),
+		"platform":             o["platform"],
+		"weight":               o["weight"],
+		"infoformat":           o["infoformat"],
+		//"urgency":              o["urgency"],
+		"isflow":       o["isflow"],
+		"spidertype":   "history",
+		"spiderremark": o["spiderremark"],
+		"omnipotent":   false,
+	}
+	AutoTpl["Base.SpiderName"] = o["name"]
+	AutoTpl["Base.SpiderCode"] = o["code"]
+	AutoTpl["Base.SpiderChannel"] = o["channel"]
+	AutoTpl["Base.SpiderTargetChannelUrl"] = o["channeladdr"]
+	modifyuser := o["modifyuser"].(string)
+	var userid, email string
+	claimLog := map[string]interface{}{}
+	common := []interface{}{
+		AutoTpl["Base.SpiderCode"],
+		AutoTpl["Base.SpiderName"],
+		AutoTpl["Base.SpiderChannel"],
+		AutoTpl["Base.SpiderDownDetailPage"],
+		AutoTpl["Base.SpiderStartPage"],
+		AutoTpl["Base.SpiderMaxPage"],
+		AutoTpl["Base.SpiderRunRate"],
+		//AutoTpl["Base.Spider2Collection"],
+		//"bidding", //爬虫导入新建默认为bidding
+		qu.ObjToString(o["coll"]),
+		AutoTpl["Base.SpiderPageEncoding"],
+		AutoTpl["Base.SpiderStoreMode"],
+		AutoTpl["Base.SpiderStoreToMsgEvent"],
+		AutoTpl["Base.SpiderTargetChannelUrl"],
+		AutoTpl["Base.SpiderLastDownloadTime"],
+		AutoTpl["Base.SpiderIsHistoricalMend"],
+		AutoTpl["Base.SpiderIsMustDownload"],
+	}
+	ptime := []interface{}{
+		AutoTpl["Step1.DateFormat"],
+		AutoTpl["Step1.Address"],
+		AutoTpl["Step1.ContentChooser"],
+	}
+	list := []interface{}{
+		AutoTpl["Step2.Listadd"],
+		AutoTpl["Step2.Listadds"],
+		AutoTpl["Step2.BlockChooser"],
+		AutoTpl["Step2.AddressChooser"],
+		AutoTpl["Step2.TitleChooser"],
+		AutoTpl["Step2.DateChooser"],
+		AutoTpl["Step2.DateFormat"],
+	}
+	content := []interface{}{
+		AutoTpl["Step3.ContentChooser"],
+		AutoTpl["Step3.ElementChooser"],
+	}
+	param["param_common"] = common
+	//向导模式
+	param["param_time"] = ptime
+	param["param_list"] = list
+	param["param_content"] = content
+	param["type_time"] = 0
+	param["type_list"] = 0
+	param["type_content"] = 0
+	//专家模式
+	param["str_time"] = ""
+	param["str_list"] = ""
+	param["str_content"] = ""
+	param["comeintime"] = time.Now().Unix()
+	param["code"] = o["code"]
+	param["site"] = o["name"]
+	param["href"] = o["channeladdr"]
+	param["channel"] = o["channel"]
+	param["createuser"] = modifyuser
+	param["createuserid"] = userid
+	param["createuseremail"] = email
+	param["modifyuser"] = modifyuser
+	param["modifyuserid"] = userid
+	param["modifytime"] = time.Now().Unix()
+	param["state"] = 0 //未完成
+	if qu.IntAll(o["event"]) > 0 {
+		param["event"] = qu.IntAll(o["event"])
+	}
+	s_model := "bid"
+	configModel := util.Config.Model[s_model]
+	model := map[string]interface{}{}
+	for k, _ := range configModel {
+		model[k] = qu.ObjToString(o[k])
+	}
+	param["model"] = model
+	param["next"] = email
+	incrementevent := qu.ObjToString(o["incrementevent"])
+	if movevent, ok := util.Config.Uploadevents[incrementevent].(string); ok && movevent != "" {
+		param["spidermovevent"] = movevent
+	}
+	infoformat := qu.IntAll(o["infoformat"])
+	infotype := "招标"
+	if infoformat == 2 {
+		infotype = "拟建/审批"
+	} else if infoformat == 3 {
+		infotype = "产权"
+	} else if infoformat == 4 {
+		infotype = "舆情"
+	}
+	ok := spider.SaveSpider(o["code"].(string), param) //爬虫保存
+	if ok {                                            //保存成功,校验新导入的爬虫对应站点是否存在,否则加站点记录
+		site, _ := u.MgoEB.FindOneByField("site", map[string]interface{}{"site": o["name"]}, map[string]interface{}{"important": 1})
+		if len(*site) == 0 {
+			qu.Debug("补充站点信息:", o["name"])
+			domain := u.DomainReg.FindString(qu.ObjToString(AutoTpl["Base.SpiderTargetChannelUrl"]))
+			if domain != "" {
+				domain = u.ReplaceReg.ReplaceAllString(domain, "")
+			}
+			siteInfo := map[string]interface{}{
+				"site":           o["name"],
+				"domain":         domain, //
+				"another_name":   "",
+				"area":           qu.ObjToString(model["area"]),
+				"city":           qu.ObjToString(model["city"]),
+				"district":       qu.ObjToString(model["district"]),
+				"site_type":      "",
+				"second_type":    "",
+				"industry":       "",
+				"p_site":         "",
+				"s_site":         "",
+				"remarktime":     time.Now().Unix(),
+				"event":          incrementevent,
+				"platform":       o["platform"],
+				"spider_status":  "0/1",
+				"updatetime":     time.Now().Unix(),
+				"delete":         false,
+				"comeintime":     time.Now().Unix(),
+				"important":      0,
+				"site_status":    1,
+				"lasttime":       int64(0),
+				"site_datanum":   0,
+				"period":         float32(0),
+				"infotype":       infotype,
+				"sponsor":        "",
+				"isneedregister": 0,
+				"isregistered":   0,
+				"special_type":   "",
+				"account":        "",
+				"password":       "",
+				"f_area":         "",
+				"f_city":         "",
+				"f_district":     "",
+				"site_subtype":   "",
+				"site_toptype":   "",
+				"type_plate":     "",
+			}
+			u.MgoEB.Save("site", siteInfo)
+		} else if qu.IntAll((*site)["important"]) == 1 { //重点网站
+			u.MgoEB.Update("luaconfig", map[string]interface{}{"code": o["code"]}, map[string]interface{}{"$set": map[string]interface{}{"spiderimportant": true}}, false, false)
+			u.MgoEB.Update("site_code_baseinfo", map[string]interface{}{"spidercode": o["code"]}, map[string]interface{}{"$set": map[string]interface{}{
+				"site":       o["name"],
+				"channel":    o["channel"],
+				"spidercode": o["code"],
+				"platform":   o["platform"],
+				"modifyuser": "",
+				"state":      0,
+			}}, true, false)
+		}
+		//生成认领日志
+		if len(claimLog) > 0 {
+			u.MgoEB.Save("lua_logs_claim", claimLog)
+		}
+	}
+	return ok
+}
+
 func (f *Front) Importdata() {
 	auth := qu.IntAll(f.GetSession("auth"))
 	if auth == u.Role_Admin {

+ 3 - 2
src/front/site.go

@@ -124,6 +124,7 @@ func (s *Site) SaveSite() {
 			return
 		}
 	}
+	oldSite := s.GetString("oldsite")
 	othername := s.GetString("othername")               //别名
 	domain := s.GetString("domain")                     //域名
 	sponsor := s.GetString("sponsor")                   //主办方
@@ -147,7 +148,7 @@ func (s *Site) SaveSite() {
 	f_city := s.GetString("f_city")         //发布城市
 	f_district := s.GetString("f_district") //发布区县
 
-	tmpdomain, status, event, platform, infotype, _, remarktime := util.GetLuasInfoBySite(site, area, city, district)
+	tmpdomain, status, event, platform, infotype, _, remarktime := util.GetLuasInfoBySite(site, oldSite, area, city, district)
 	if domain == "" {
 		domain = tmpdomain
 	}
@@ -233,7 +234,7 @@ func (s *Site) ImportSite() {
 				site_toptype := r.Cells[7].Value
 				p_site := r.Cells[8].Value
 				s_site := r.Cells[9].Value
-				tmpdomain, status, event, platform, infotype, specialtype, remarktime := util.GetLuasInfoBySite(site, area, city, district)
+				tmpdomain, status, event, platform, infotype, specialtype, remarktime := util.GetLuasInfoBySite(site, site, area, city, district)
 				if domain == "" {
 					domain = tmpdomain
 				}

+ 2 - 0
src/main.go

@@ -100,6 +100,8 @@ func main() {
 	go timetask.TimeTask()
 	//爬虫质检
 	go luacheck.LuaCheckStart()
+	//万能爬虫校验
+	go u.CommCodeCheck()
 	//提供接口,接收其他数据
 	http.HandleFunc("/spider/infos", func(w http.ResponseWriter, req *http.Request) {
 		data := req.FormValue("data")

+ 3 - 2
src/spider/script.go

@@ -447,7 +447,8 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		text := S.ToString(-3)
 		old := S.ToString(-2)
 		repl := S.ToString(-1)
-		text = strings.Replace(text, old, repl, n)
+		reg := regexp.MustCompile(old)
+		text = reg.ReplaceAllString(text, repl)
 		S.Push(lua.LString(text))
 		return 1
 	}))
@@ -758,7 +759,7 @@ func (s *Script) LoadScript(site *string, downloadnode, script string, isfile ..
 		if err == nil {
 			headJsonStr = string(headByte)
 		}
-		code, respHead, respCookie := codegrpc.GetCodeByPath(path, stype, headJsonStr, cookie, proxy)
+		code, respHead, respCookie := codegrpc.GetCodeByPath(s.SCode, path, stype, headJsonStr, cookie, proxy)
 		qu.Debug("GetCodeByPath code====", code)
 		//qu.Debug("respHead====", respHead)
 		//qu.Debug("respCookie====", respCookie)

+ 5 - 5
src/spider/service.go

@@ -408,10 +408,7 @@ func UpdateSpiderByCodeState(code, state string, event int) (b bool, err error)
 		}
 	}()
 	if istate == 5 {
-		ok := UpdateOnlineCode(code, istate) //上架更新
-		if !ok {
-			return ok, errors.New("线上表更新失败")
-		}
+		go UpdateOnlineCode(code, istate) //上架更新
 	}
 	msgid := mu.UUID(8)
 	data := map[string]interface{}{}
@@ -437,19 +434,22 @@ func UpdateSpiderByCodeState(code, state string, event int) (b bool, err error)
 }
 
 func UpdateOnlineCode(code string, state int) bool {
+	qu.Debug("-----------更新线上爬虫", code)
 	upsert := false
 	query := map[string]interface{}{"code": code}
 	set := map[string]interface{}{}
 	if state == 6 { //下架
 		set = map[string]interface{}{"state": state}
 	} else if state == 5 { //上架
-		//time.Sleep(1 * time.Minute) //更新太多了,防止这个luaconfig_online更新动作在luaconfig之前
+		time.Sleep(30 * time.Second) //更新太多了,防止这个luaconfig_online更新动作在luaconfig之前
 		lua, _ := u.MgoEB.FindOne("luaconfig", map[string]interface{}{"code": code})
 		if len(*lua) > 0 {
+			(*lua)["state"] = state
 			set = *lua
 		} else {
 			return false
 		}
 	}
+	qu.Debug("-----------更新线上爬虫完毕", code)
 	return u.MgoEB.Update("luaconfig_online", query, map[string]interface{}{"$set": set}, upsert, false)
 }

+ 1 - 1
src/timetask/timetask.go

@@ -119,7 +119,7 @@ func UpdateSiteInfo() {
 	sites, _ := util.MgoEB.Find(sp.Config.SiteColl, map[string]interface{}{"delete": false}, ``, `{"site":1}`, false, -1, -1)
 	for _, s := range *sites {
 		site := qu.ObjToString(s["site"])
-		domain, status, event, platform, infotype, specialtype, _ := util.GetLuasInfoBySite(site, "", "", "")
+		domain, status, event, platform, infotype, specialtype, _ := util.GetLuasInfoBySite(site, site, "", "", "")
 		set := map[string]interface{}{
 			"$set": map[string]interface{}{
 				"platform":      platform,

+ 43 - 0
src/util/code.go

@@ -0,0 +1,43 @@
+package util
+
+import (
+	"fmt"
+	qu "qfw/util"
+	"regexp"
+	"sort"
+	"strconv"
+)
+
+// CheckCode 检查导入爬虫的唯一性
+func CheckCode(code string) string {
+	query := map[string]interface{}{
+		"code": map[string]interface{}{
+			"$regex": code,
+		},
+	}
+	fields := map[string]interface{}{"code": 1}
+	list, _ := MgoEB.Find("luaconfig", query, nil, fields, false, -1, -1)
+	var codes []string
+	for _, l := range *list {
+		tmpCode := qu.ObjToString(l["code"])
+		reg := regexp.MustCompile("(^" + code + ")_(\\d+)$")
+		matches := reg.FindStringSubmatch(tmpCode)
+		if code == tmpCode {
+			codes = append(codes, code+"_01")
+		} else if len(matches) == 3 {
+			prefix := matches[1]
+			numberStr := matches[2]
+			number, _ := strconv.Atoi(numberStr)
+			newNumber := number + 1
+			originalLength := len(numberStr)
+			newNumberStr := fmt.Sprintf("%0*d", originalLength, newNumber)
+			newCode := fmt.Sprintf("%s_%s", prefix, newNumberStr)
+			codes = append(codes, newCode)
+		}
+	}
+	if lenCodes := len(codes); lenCodes > 0 {
+		sort.Strings(codes)
+		code = codes[lenCodes-1]
+	}
+	return code
+}

+ 155 - 0
src/util/commcodecheck.go

@@ -0,0 +1,155 @@
+package util
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	qu "qfw/util"
+	sp "spiderutil"
+	"strings"
+	"sync"
+	"time"
+)
+
+var (
+	CommCodesCache = make(chan map[string]string, 1000)
+)
+
+const (
+	PlatformFlag = 6 //发布量大异常标记
+	UserName     = "comai"
+	UserId       = "67f49801ebd87fd0d9123161"
+	UserEmail    = "jiqiren@topnet.net.cn"
+)
+
+type (
+	//Result struct {
+	//	Code          string        `json:"code"`
+	//	Href          string        `json:"href"`
+	//	Omnipotent    bool          `json:"omnipotent"`
+	//	Flag          int           `json:"flag"`
+	//	DetailLen     int           `json:"detaillen"`
+	//	DetailBodyLen int           `json:"detailbodylen"`
+	//	CssMark       *SpiderConfig `json:"cssmark"`
+	//}
+
+	//爬虫配置信息
+	SpiderConfig struct {
+		//Href               string     `json:"href"`
+		//AttachJSCode       string     `json:"attachJs"` //无效
+		InitList           []*Actions `json:"initList"`
+		ListBodyCss        string     `json:"listBodyCss"` //用于判断是否翻页成功
+		ListItemCss        string     `json:"listItemCss"`
+		ListLinkCss        string     `json:"listLinkCss"`
+		ListPubtimeCss     string     `json:"listPublishTimeCss"`
+		ListNextPageCss    string     `json:"listNextPageCss"`
+		TitleCss           string     `json:"titleCss"`
+		PublishUnitCss     string     `json:"publishUnitCss"`
+		PublishTimeCss     string     `json:"publishTimeCss"`
+		ContentCss         string     `json:"contentCss"`
+		AttachCss          string     `json:"attachCss"`
+		ListJSCode         string     `json:"listJs"`
+		ContentJSCode      string     `json:"contentJs"`
+		ListTurnPageJSCode string     `json:"listTurnPageJs"`
+		MaxPages           int64      `json:"maxPages"`
+		FilterResource     string     `json:"filterResource"` //要过滤的资源
+		//延时
+		ListDelayTime     int64 `json:"listDelayTime"`
+		ListTurnDelayTime int64 `json:"listTurnDelayTime"`
+		ContentDelayTime  int64 `json:"contentDelayTime"`
+	}
+
+	Actions struct {
+		ActionJs  string `json:"actionJs"`
+		CheckJs   string `json:"checkJs"`
+		SleepTime int64  `json:"sleepTime"`
+	}
+)
+
+func CommCodeCheck() {
+	wg := &sync.WaitGroup{}
+	ch := make(chan bool, 5)
+	for {
+		code := <-CommCodesCache //取数据
+		ch <- true
+		wg.Add(1)
+		go func(tmp map[string]string) {
+			defer func() {
+				<-ch
+				wg.Done()
+			}()
+			href := tmp["href"]
+			r, err := OmnipotentCodeCheck(href)
+			omnipotent, _ := r["omnipotent"].(bool)
+			if err == nil {
+				r["code"] = tmp["code"]
+				r["site"] = tmp["site"]
+				r["channel"] = tmp["channel"]
+				r["comeintime"] = time.Now().Unix()
+				MgoEB.Save("luaconfig_omnipotent_log", r)
+				set := map[string]interface{}{}
+				if omnipotent {
+					set["cssmark"] = r["cssmark"]
+					set["omnipotent"] = true
+					set["recovertime"] = time.Now().Unix()
+					set["claimtime"] = time.Now().Unix()
+					set["claimtype"] = 2 //已认领
+					set["state"] = 11    //已上线
+					//默认机器人用户
+					set["createuser"] = UserName
+					set["createuserid"] = UserId
+					set["createuseremail"] = UserEmail
+					set["modifyuser"] = UserName
+					set["modifyuserid"] = UserId
+				} else if qu.IntAll(r["flag"]) == PlatformFlag {
+					set["platform"] = "golua平台"
+					set["incrementevent"] = 7200
+				}
+				if len(set) > 0 {
+					MgoEB.Update("luaconfig",
+						map[string]interface{}{
+							"code":      tmp["code"],
+							"claimtype": 0,
+						},
+						map[string]interface{}{
+							"$set": set,
+						},
+						false, false)
+					if omnipotent {
+						time.Sleep(5 * time.Second)
+						one, _ := MgoEB.FindOne("luaconfig", map[string]interface{}{"code": tmp["code"]})
+						if len(*one) > 0 {
+							MgoEB.SaveByOriID("luaconfig_online", *one)
+						}
+					}
+				}
+			} else {
+				qu.Debug("万能爬虫验证异常:", err)
+			}
+		}(code)
+	}
+	wg.Wait()
+}
+
+func OmnipotentCodeCheck(url string) (result map[string]interface{}, err error) {
+	client := &http.Client{
+		Timeout: 2 * time.Minute,
+	}
+	req, err := http.NewRequest("POST", sp.Config.OptCodeServer, strings.NewReader(fmt.Sprintf("%s=%s", "url", url)))
+	req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
+	if err != nil {
+		return
+	}
+	resp, err := client.Do(req)
+	if err != nil {
+		return
+	}
+	defer resp.Body.Close()
+	respBody, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return
+	}
+	err = json.Unmarshal(respBody, &result)
+	return
+}

+ 7 - 3
src/util/util.go

@@ -16,6 +16,7 @@ import (
 )
 
 const Role_Admin, Role_Examine, Role_Dev = 3, 2, 1 //管理员,审核员,开发员
+
 var (
 	//MgoE            *mgo.MongodbSim //编辑器87
 	MgoEB                *mgo.MongodbSim //编辑器163
@@ -355,7 +356,7 @@ func GetModifyUsers() []map[string]interface{} {
 	return *user
 }
 
-func GetLuasInfoBySite(site, area, city, district string) (domain, status, event, platform, infotype, specialtype string, remarktime int64) {
+func GetLuasInfoBySite(site, oldSite, area, city, district string) (domain, status, event, platform, infotype, specialtype string, remarktime int64) {
 	shelveUp := 0
 	eventMap, platformMap := map[int]interface{}{}, map[string]interface{}{}
 	infoformatMap := map[int]bool{}
@@ -367,11 +368,14 @@ func GetLuasInfoBySite(site, area, city, district string) (domain, status, event
 	domainArr := []string{}
 	remarktime = time.Now().Unix()
 	//luas, _ := MgoE.Find("luaconfig", `{"param_common.1":"`+site+`"}`, ``, `{"model":1,"event":1,"state":1,"platform":1,"param_common":1,"comeintime":1}`, false, -1, -1)
-	luas, _ := MgoEB.Find("luaconfig", `{"site":"`+site+`"}`, ``, `{"projecthref":1,"model":1,"event":1,"state":1,"platform":1,"param_common":1,"comeintime":1,"infoformat":1}`, false, -1, -1)
+	luas, _ := MgoEB.Find("luaconfig", `{"site":"`+oldSite+`"}`, ``, `{"projecthref":1,"model":1,"event":1,"state":1,"platform":1,"param_common":1,"comeintime":1,"infoformat":1}`, false, -1, -1)
 	arr := [][]map[string]interface{}{}
 	for _, l := range *luas {
 		update := []map[string]interface{}{}
-		set := map[string]interface{}{}
+		set := map[string]interface{}{
+			"site":           site,
+			"param_common.1": site,
+		}
 		if b, ok := l["projecthref"].(bool); ok && b { //爬虫采集的数据是流程性信息
 			specialtype = "含流程数据"
 		}

+ 33 - 0
src/web/templates/import.html

@@ -9,6 +9,11 @@
 				</button>
 				<iframe srcdoc="<form id='uploadform' method='post' enctype='multipart/form-data' action='/center/importlua'><input type='file' name='xlsx' /></form>" height=0 scrolling=no class="hide"  id="fileframe">
 				</iframe>
+				 <button class="btn btn-success" onclick='importAi()'>
+					 AI识别导入
+				 </button>
+				 <iframe srcdoc="<form id='uploadform' method='post' enctype='multipart/form-data' action='/center/importai'><input type='file' name='xlsx' /></form>" height=0 scrolling=no class="hide"  id="aifileframe">
+				 </iframe>
 			 </small>
 		   </h1>
 		   <ol class="breadcrumb">
@@ -103,6 +108,34 @@
 		  }
 		})
 	}
+	function importAi(){
+		var f=$("#aifileframe").contents().find("input");
+		f.get(0).click();
+		f.change(function(){
+			var val=$(this).val()?$(this).val():"";
+			if(val.indexOf(".xlsx")<0){
+				showMsg("文件格式非法", function() {});
+			}else{
+				$(this).parent().submit();
+				common.maskShow("正在导入数据");
+				var ret=setInterval(function(){
+					var f=$(window.frames[1].document).find("form");
+					if(f.length==0){
+						common.maskHide();
+						var b=$(window.frames[1].document).find("body").html();
+						$(window.frames[1].document).find("body").append("<form id='uploadform' method='post' enctype='multipart/form-data' action='/center/importai'><input type='file' name='xlsx' /></form>");
+						var r=window.confirm("导入完毕,是否查看错误信息");
+						if(r){
+							$("#errmsg").removeClass("hide").append(b);
+						}
+						clearInterval(ret);
+						importSpider.ajax.reload();
+					}
+				},500)
+
+			}
+		})
+	}
 </script>
 </div>
 {{include "bottom.html"}}

+ 5 - 13
src/web/templates/sitelist.html

@@ -414,6 +414,7 @@ var platTypeMap = {{.T.plattype}};
 var areas = {{.T.areas}};
 var provinces = {{.T.provinces}};
 var citys = {{.T.citys}};
+var oldSite = ""
 $(function(){
     // siteTypesMap = {{.T.sitetypes}};
     // $("#edit-site_toptype").append("<option value=''>--请选择网站一级类型--</option>");
@@ -627,19 +628,6 @@ $(function(){
           { "data": "f_area"},
           { "data": "f_city"},
           { "data": "f_district"},
-          // { "data": "p_site"},
-          // { "data": "s_site"},
-          // { "data": "spider_status",width:"28px",render:function (val,a,row) {
-          //       return  "<a href='/center/site/getluas/"+row.site+"' style='color: #333 !important;' target='_blank'>"+val+"</a>"
-          //       //return  '<a onclick="goToCenter(\''+row.site+'\')" style="color: #333 !important;" target="_blank">'+val+'</a>'
-          //     }},
-          // { "data": "event",render:function (val,a,row){
-          //        var tmpval =val
-          //        if(val.length >9){
-          //            val = val.substring(0,9)+"..."
-          //        }
-          //        return "<span title='"+tmpval+"'>"+val+"</span>"
-          //     }},
           { "data": "infotype",render:function (val,a,row) {
                 if(val){
                     return val;
@@ -943,6 +931,9 @@ function editSiteSave(){
         return
     }
     formdataMap["id"] = $("#saveSiteId").attr("saveid");
+    if (oldSite != ""){
+        formdataMap["oldsite"] = oldSite;
+    }
     $.ajax({
         url:"/center/site/savesite",
         type:"post",
@@ -964,6 +955,7 @@ function editSiteSave(){
 //编辑信息
 function siteEdit(rowjson){
     var row = JSON.parse(rowjson);
+    oldSite = row.site;
     $("#edit-site").val(row.site);
     $("#edit-domain").val(row.domain);
     $("#edit-othername").val(row.another_name);