// service package spider import ( "encoding/json" "errors" "fmt" mu "mfw/util" "os" qu "qfw/util" mongodb "qfw/util/mongodb" util "spiderutil" "strings" "time" "gopkg.in/mgo.v2/bson" ) //获取脚本文件 func GetScript(code string, str ...interface{}) (script, script_list, script_content string) { defer mu.Catch() //script := "" luaconfig := *mongodb.FindOne("luaconfig", `{"code":"`+code+`"}`) //qu.Debug(code, "lua---", luaconfig) if luaconfig["listcheck"] == nil { luaconfig["listcheck"] = "" } if luaconfig["contentcheck"] == nil { luaconfig["contentcheck"] = "" } if luaconfig != nil && len(luaconfig) > 0 { common := luaconfig["param_common"].([]interface{}) if len(str) > 0 { if len(common) == 15 { common = append(common, str[0], str[1], str[2]) } else { common = append(common, false, false, str[0], str[1], str[2]) } } else { if len(common) == 15 { common = append(common, "", "", "") } else { common = append(common, false, false, "", "", "") } } for k, v := range common { if k == 4 || k == 5 || k == 6 || k == 9 || k == 10 { common[k] = qu.IntAll(v) } } script, _ = GetTmpModel(map[string][]interface{}{"common": common}) script_time := "" if luaconfig["type_time"] == 0 { time := luaconfig["param_time"].([]interface{}) script_time, _ = GetTmpModel(map[string][]interface{}{ "time": time, }) } else { script_time = luaconfig["str_time"].(string) } //script_list := "" //列表页 if luaconfig["type_list"] == 0 { list := luaconfig["param_list"].([]interface{}) addrs := strings.Split(list[1].(string), "\n") if len(addrs) > 0 { for k, v := range addrs { addrs[k] = "'" + v + "'" } list[1] = strings.Join(addrs, ",") } else { list[1] = "" } script_list, _ = GetTmpModel(map[string][]interface{}{ "list": list, "listcheck": []interface{}{luaconfig["listcheck"]}, }) } else { script_list = luaconfig["str_list"].(string) } //script_content := "" //三级页 if luaconfig["type_content"] == 0 { content := luaconfig["param_content"].([]interface{}) script_content, _ = GetTmpModel(map[string][]interface{}{ "content": content, "contentcheck": []interface{}{luaconfig["contentcheck"]}, }) } else { script_content = luaconfig["str_content"].(string) } script += fmt.Sprintf(util.Tmp_Other, luaconfig["spidertype"], luaconfig["spiderhistorymaxpage"], luaconfig["spidermovevent"]) script += ` ` + script_time + ` ` + script_list + ` ` + script_content script = ReplaceModel(script, common, luaconfig["model"].(map[string]interface{})) } return } //保存更新脚本 func SaveSpider(code string, param map[string]interface{}) bool { return mongodb.Update("luaconfig", bson.M{"code": code}, map[string]interface{}{"$set": param}, true, true) } /*获取最后发布时间 comm 通用参数 param 向导参数 proficient 专家脚本 guideType 向导类型 */ func GetLastPublishTime(comm, param []interface{}, proficient, downloadnode string, guideType int, scripts ...int) (timestr interface{}, err interface{}) { defer mu.Catch() var script string if guideType == 0 { script, err = GetTmpModel(map[string][]interface{}{ "common": comm, "time": param, }) } else { script, err = GetTmpModel(map[string][]interface{}{ "common": comm, }) script += proficient } if len(scripts) > 0 { return "", errors.New(script).Error() } if err != nil { return "", err } sp := CreateSpider(downloadnode, script) defer sp.L.Close() timestr, err = sp.GetLastPublishTimeTest() return } /*获取列表信息 comm 通用参数 param 向导参数 model 补充模型 modeltype 模型类型 proficient 专家脚本 guideType 向导类型 */ func GetPageList(comm, param []interface{}, model map[string]interface{}, listcheck interface{}, proficient, downloadnode string, guideType int, scripts ...int) (list []interface{}, err interface{}) { defer mu.Catch() var script string if guideType == 0 { script, err = GetTmpModel(map[string][]interface{}{ "common": comm, "list": param, "listcheck": []interface{}{listcheck}, }) script = ReplaceModel(script, comm, model) } else { script, err = GetTmpModel(map[string][]interface{}{ "common": comm, }) script += proficient } if len(scripts) > 0 { return nil, errors.New(script).Error() } if err != nil { return nil, err } sp := CreateSpider(downloadnode, script) sp.SpiderMaxPage = 1 defer sp.L.Close() list, err = sp.DownListPageItemTest() return } /*获取三级页信息 comm 通用参数 param 向导参数 proficient 专家脚本 guideType 向导类型 */ func GetContentInfo(comm, param []interface{}, data map[string]interface{}, contentcheck interface{}, proficient, downloadnode string, guideType int, scripts ...int) (rep map[string]interface{}, err interface{}) { defer mu.Catch() var script string if guideType == 0 { script, err = GetTmpModel(map[string][]interface{}{ "common": comm, "content": param, "contentcheck": []interface{}{contentcheck}, }) } else { script, err = GetTmpModel(map[string][]interface{}{ "common": comm, }) script += proficient } if len(scripts) > 0 { return nil, errors.New(script).Error() } if err != nil { return nil, err } sp := CreateSpider(downloadnode, script) sp.SpiderMaxPage = 1 defer sp.L.Close() param2 := map[string]string{} for k, v := range data { param2[k] = fmt.Sprint(v) } rep, err = sp.DownloadDetailPageTest(param2, data) return } //补充模型 func ReplaceModel(script string, comm []interface{}, model map[string]interface{}) string { //补充通用信息 commstr := `item["spidercode"]="` + comm[0].(string) + `";` commstr += `item["site"]="` + comm[1].(string) + `";` commstr += `item["channel"]="` + comm[2].(string) + `";` script = strings.Replace(script, "--Common--", commstr, -1) //补充模型信息 modelstr := "" for k, v := range model { modelstr += `item["` + k + `"]="` + v.(string) + `";` } script = strings.Replace(script, "--Model--", modelstr, -1) return script } //创建爬虫 func CreateSpider(downloadnode, script string, isfile ...string) *Spider { defer mu.Catch() sp := &Spider{} sp.LoadScript(downloadnode, script, isfile...) sp.Code = sp.GetVar("spiderCode") sp.SCode = sp.Code sp.Name = sp.GetVar("spiderName") sp.Channel = sp.GetVar("spiderChannel") sp.DownDetail = sp.GetBoolVar("spiderDownDetailPage") sp.Collection = sp.GetVar("spider2Collection") sp.SpiderStartPage = int64(sp.GetIntVar("spiderStartPage")) sp.SpiderMaxPage = int64(sp.GetIntVar("spiderMaxPage")) sp.SpiderRunRate = int64(sp.GetIntVar("spiderRunRate")) sp.StoreToMsgEvent = sp.GetIntVar("spiderStoreToMsgEvent") sp.StoreMode = sp.GetIntVar("spiderStoreMode") sp.CoverAttr = sp.GetVar("spiderCoverAttr") spiderSleepBase := sp.GetIntVar("spiderSleepBase") if spiderSleepBase == -1 { sp.SleepBase = 1000 } else { sp.SleepBase = spiderSleepBase } spiderSleepRand := sp.GetIntVar("spiderSleepRand") if spiderSleepRand == -1 { sp.SleepRand = 1000 } else { sp.SleepRand = spiderSleepRand } spiderTimeout := sp.GetIntVar("spiderTimeout") if spiderTimeout == -1 { sp.Timeout = 60 } else { sp.Timeout = int64(spiderTimeout) } sp.TargetChannelUrl = sp.GetVar("spiderTargetChannelUrl") sp.SpiderIsHistoricalMend = sp.GetBoolVar("spiderIsHistoricalMend") sp.SpiderIsMustDownload = sp.GetBoolVar("spiderIsMustDownload") //qu.Debug(sp.SpiderIsHistoricalMend, sp.SpiderIsMustDownload) return sp } //生成爬虫脚本 func GetTmpModel(param map[string][]interface{}) (script string, err interface{}) { qu.Try(func() { if param != nil && param["common"] != nil { if len(param["common"]) < 12 { err = "公共参数配置不全" } else { script = fmt.Sprintf(util.Tmp_common, param["common"]...) } } if param != nil && param["time"] != nil { if len(param["time"]) < 3 { err = "方法:time-参数配置不全" } else { script += fmt.Sprintf(util.Tmp_pubtime, param["time"]...) } } if param != nil && param["list"] != nil { if len(param["list"]) < 7 { err = "方法:list-参数配置不全" } else { list := []interface{}{param["listcheck"][0]} list = append(list, param["list"]...) script += fmt.Sprintf(util.Tmp_pagelist, list...) script = strings.Replace(script, "#pageno#", `"..tostring(pageno).."`, -1) } } if param != nil && param["content"] != nil { if len(param["content"]) < 2 { err = "方法:content-参数配置不全" } else { content := []interface{}{param["contentcheck"][0]} content = append(content, param["content"]...) script += fmt.Sprintf(util.Tmp_content, content...) } } }, func(e interface{}) { err = e }) return script, err } //生成文件 func CreateFile(code, script string) (string, error) { filepath := "res/" + time.Now().Format("2006/01/02") err := os.MkdirAll(filepath, 0777) f, err := os.Create(filepath + "/spider_" + code + ".lua") defer f.Close() f.WriteString(script) return filepath, err } //上传脚本 func UpdateSpiderByCodeState(code, state string, event int) (bool, error) { msgid := mu.UUID(8) data := map[string]interface{}{} data["code"] = code data["state"] = state rep := map[string]interface{}{} var bs []byte var err error if util.Config.Uploadevents[fmt.Sprint(event)] == "bid" { //? bs, err = MsclientBid.Call("", msgid, event, mu.SENDTO_TYPE_ALL_RECIVER, data, 60) } else { bs, err = Msclient.Call("", msgid, event, mu.SENDTO_TYPE_ALL_RECIVER, data, 60) } if err != nil { return false, err } else { json.Unmarshal(bs, &rep) b, _ := rep["b"].(bool) if !b { err = errors.New(qu.ObjToString(rep["err"])) } return b, err } }