|
@@ -5,6 +5,7 @@ import (
|
|
|
"fmt"
|
|
|
"log"
|
|
|
qu "qfw/util"
|
|
|
+ "regexp"
|
|
|
"sort"
|
|
|
"strings"
|
|
|
"sync"
|
|
@@ -15,28 +16,31 @@ import (
|
|
|
"go.mongodb.org/mongo-driver/bson/primitive"
|
|
|
)
|
|
|
|
|
|
-//es、mgo非全部字段
|
|
|
-var FieldListMap = map[string]map[string]bool{
|
|
|
- "partners": map[string]bool{"stock_type": true, "stock_name": true, "stock_capital": false, "stock_realcapital": false, "identify_type": true, "identify_no": true},
|
|
|
- "employees": map[string]bool{"employee_name": false, "position": false},
|
|
|
-}
|
|
|
-
|
|
|
-//全部字段
|
|
|
-var AllFieldListMap = []string{"punishes", "operations", "illegals"}
|
|
|
+var (
|
|
|
+ //清理
|
|
|
+ Han = regexp.MustCompile("[\\p{Han}]") //匹配汉字
|
|
|
|
|
|
-//地区处理
|
|
|
-var AreaFiled = []string{"credit_no", "company_code", "area_code"}
|
|
|
+ //es、mgo非全部字段
|
|
|
+ FieldListMap = map[string]map[string]bool{
|
|
|
+ "partners": map[string]bool{"stock_type": true, "stock_name": true, "stock_capital": false, "stock_realcapital": false, "identify_type": true, "identify_no": true},
|
|
|
+ "employees": map[string]bool{"employee_name": false, "position": false},
|
|
|
+ }
|
|
|
+ //全部字段
|
|
|
+ AllFieldListMap = []string{"punishes", "operations", "illegals"}
|
|
|
+ //地区处理
|
|
|
+ AreaFiled = []string{"credit_no", "company_code", "area_code"}
|
|
|
+ //年报信息
|
|
|
+ AnnualReportsArr = [][]string{
|
|
|
+ []string{"report_year", "company_phone", "zip_code", "company_email", "employee_no", "operator_name"},
|
|
|
+ []string{"total_assets", "total_equity", "total_sales", "total_profit", "main_business_income", "profit_amount", "total_tax", "total_liability"},
|
|
|
+ }
|
|
|
+)
|
|
|
|
|
|
// var AllFieldListMap = map[string]string{
|
|
|
// "punishes": "punish_size",
|
|
|
// "operations": "operation_size",
|
|
|
// "illegals": "illegal_size",
|
|
|
// }
|
|
|
-var AnnualReportsArr = [][]string{
|
|
|
- []string{"report_year", "company_phone", "zip_code", "company_email", "employee_no", "operator_name"},
|
|
|
- []string{"total_assets", "total_equity", "total_sales", "total_profit", "main_business_income", "profit_amount", "total_tax", "total_liability"},
|
|
|
-}
|
|
|
-
|
|
|
//不生索引字段
|
|
|
//var NotEsField = []string{"cancel_reason", "revoke_reason", "cancels"} //cancel_size
|
|
|
|
|
@@ -232,7 +236,7 @@ func QyxyStandard() bool {
|
|
|
//list数据
|
|
|
stockName := []string{}
|
|
|
for field, fieldMap := range FieldListMap {
|
|
|
- if list, ok := tmp[field].(primitive.A); ok && len(list) > 0 {
|
|
|
+ if list, ok := tmp[field].([]interface{}); ok && len(list) > 0 {
|
|
|
if len(list) > 500 {
|
|
|
list = list[:500]
|
|
|
}
|
|
@@ -272,7 +276,7 @@ func QyxyStandard() bool {
|
|
|
esMap["stock_name"] = strings.Join(stockName, ",")
|
|
|
}
|
|
|
for _, field := range AllFieldListMap {
|
|
|
- if list, ok := tmp[field].(primitive.A); ok && len(list) > 0 {
|
|
|
+ if list, ok := tmp[field].([]interface{}); ok && len(list) > 0 {
|
|
|
tmpArrMgo := []map[string]interface{}{}
|
|
|
for _, l := range list {
|
|
|
tmpMapMgo := map[string]interface{}{}
|
|
@@ -296,10 +300,10 @@ func QyxyStandard() bool {
|
|
|
sortArr := []string{} //存年份
|
|
|
sortMap := map[string]map[string]interface{}{} //key:年份;val:每一个年报中的company_phone,company_email,stock_name
|
|
|
tmpArrMgo := []map[string]interface{}{}
|
|
|
- if annual_reports, ok := tmp["annual_reports"].(primitive.A); ok && len(annual_reports) > 0 {
|
|
|
+ if annual_reports, ok := tmp["annual_reports"].([]interface{}); ok && len(annual_reports) > 0 {
|
|
|
for _, annual_report := range annual_reports {
|
|
|
- tmpMapMgo := map[string]interface{}{}
|
|
|
- tmpMap := map[string]interface{}{}
|
|
|
+ tmpMapMgo := map[string]interface{}{} //记录每个年报信息标准化到mgo的数据
|
|
|
+ tmpMap := map[string]interface{}{} //只记录每个年报信息的company_email和company_phone
|
|
|
report_year := ""
|
|
|
m := annual_report.(map[string]interface{})
|
|
|
for i, tmpArr := range AnnualReportsArr {
|
|
@@ -309,13 +313,15 @@ func QyxyStandard() bool {
|
|
|
if f == "report_year" {
|
|
|
report_year = textstr
|
|
|
sortArr = append(sortArr, textstr)
|
|
|
- } else if f == "company_phone" && len(textstr) >= 7 {
|
|
|
+ } else if f == "company_phone" && !Han.MatchString(textstr) && len(textstr) >= 7 {
|
|
|
tmpMap[f] = textstr
|
|
|
- } else if f == "company_email" {
|
|
|
+ tmpMapMgo[f] = textstr
|
|
|
+ } else if f == "company_email" && !Han.MatchString(textstr) && len(textstr) >= 4 {
|
|
|
tmpMap[f] = textstr
|
|
|
+ tmpMapMgo[f] = textstr
|
|
|
}
|
|
|
if i == 0 { //字符串信息
|
|
|
- if f == "company_phone" && len(textstr) < 7 {
|
|
|
+ if f == "company_phone" || f == "company_email" {
|
|
|
continue
|
|
|
}
|
|
|
tmpMapMgo[f] = textstr
|
|
@@ -328,7 +334,7 @@ func QyxyStandard() bool {
|
|
|
}
|
|
|
}
|
|
|
// stock_nameArr := []string{}
|
|
|
- // if i_partners, ok := m["report_partners"].(primitive.A); ok && len(i_partners) > 0 { //股东信息
|
|
|
+ // if i_partners, ok := m["report_partners"].([]interface{}); ok && len(i_partners) > 0 { //股东信息
|
|
|
// for _, par := range i_partners {
|
|
|
// m := par.(map[string]interface{})
|
|
|
// if stock_name, ok := m["stock_name"].(string); ok && stock_name != "" {
|
|
@@ -570,7 +576,7 @@ func HistoryQyxyStandard() bool {
|
|
|
//list数据
|
|
|
stockName := []string{}
|
|
|
for field, fieldMap := range FieldListMap {
|
|
|
- if list, ok := tmp[field].(primitive.A); ok && len(list) > 0 {
|
|
|
+ if list, ok := tmp[field].([]interface{}); ok && len(list) > 0 {
|
|
|
if len(list) > 500 {
|
|
|
list = list[:500]
|
|
|
}
|
|
@@ -610,7 +616,7 @@ func HistoryQyxyStandard() bool {
|
|
|
esMap["stock_name"] = strings.Join(stockName, ",")
|
|
|
}
|
|
|
for _, field := range AllFieldListMap {
|
|
|
- if list, ok := tmp[field].(primitive.A); ok && len(list) > 0 {
|
|
|
+ if list, ok := tmp[field].([]interface{}); ok && len(list) > 0 {
|
|
|
tmpArrMgo := []map[string]interface{}{}
|
|
|
for _, l := range list {
|
|
|
tmpMapMgo := map[string]interface{}{}
|
|
@@ -634,7 +640,7 @@ func HistoryQyxyStandard() bool {
|
|
|
sortArr := []string{} //存年份
|
|
|
sortMap := map[string]map[string]interface{}{} //key:年份;val:每一个年报中的company_phone,company_email,stock_name
|
|
|
tmpArrMgo := []map[string]interface{}{}
|
|
|
- if annual_reports, ok := tmp["annual_reports"].(primitive.A); ok && len(annual_reports) > 0 {
|
|
|
+ if annual_reports, ok := tmp["annual_reports"].([]interface{}); ok && len(annual_reports) > 0 {
|
|
|
for _, annual_report := range annual_reports {
|
|
|
tmpMapMgo := map[string]interface{}{}
|
|
|
tmpMap := map[string]interface{}{}
|
|
@@ -647,13 +653,15 @@ func HistoryQyxyStandard() bool {
|
|
|
if f == "report_year" {
|
|
|
report_year = textstr
|
|
|
sortArr = append(sortArr, textstr)
|
|
|
- } else if f == "company_phone" && len(textstr) >= 7 {
|
|
|
+ } else if f == "company_phone" && !Han.MatchString(textstr) && len(textstr) >= 7 {
|
|
|
tmpMap[f] = textstr
|
|
|
- } else if f == "company_email" {
|
|
|
+ tmpMapMgo[f] = textstr
|
|
|
+ } else if f == "company_email" && !Han.MatchString(textstr) && len(textstr) >= 4 {
|
|
|
tmpMap[f] = textstr
|
|
|
+ tmpMapMgo[f] = textstr
|
|
|
}
|
|
|
if i == 0 { //字符串信息
|
|
|
- if f == "company_phone" && len(textstr) < 7 {
|
|
|
+ if f == "company_phone" || f == "company_email" {
|
|
|
continue
|
|
|
}
|
|
|
tmpMapMgo[f] = textstr
|
|
@@ -666,7 +674,7 @@ func HistoryQyxyStandard() bool {
|
|
|
}
|
|
|
}
|
|
|
// stock_nameArr := []string{}
|
|
|
- // if i_partners, ok := m["report_partners"].(primitive.A); ok && len(i_partners) > 0 { //股东信息
|
|
|
+ // if i_partners, ok := m["report_partners"].([]interface{}); ok && len(i_partners) > 0 { //股东信息
|
|
|
// for _, par := range i_partners {
|
|
|
// m := par.(map[string]interface{})
|
|
|
// if stock_name, ok := m["stock_name"].(string); ok && stock_name != "" {
|