main.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. package main
  2. import (
  3. "context"
  4. "fmt"
  5. "github.com/importcjj/sensitive"
  6. "go.mongodb.org/mongo-driver/bson/primitive"
  7. "google.golang.org/grpc"
  8. "gopkg.in/olivere/elastic.v1"
  9. "gopkg.in/yaml.v2"
  10. "io/ioutil"
  11. "log"
  12. "math/big"
  13. "net"
  14. "net/http"
  15. "regexp"
  16. "sensitiveWords.udp/proto_grpc"
  17. "sensitiveWords.udp/util"
  18. "strconv"
  19. "strings"
  20. )
  21. const (
  22. YAMLFILE = "./server.yaml"
  23. )
  24. var reg_alias = regexp.MustCompile("(税务局|工商行政管理局|文化广播电视新闻出版局|外国专家局|" +
  25. "中医药管理局|市场监督管理局|广播电视局|医疗保障局|机关事务管理局|粮食和物资储备局|" +
  26. "监狱管理局|畜牧兽医局|食品药品监督管理局|城市管理行政执法局|城市管理局|国家保密局|密码管理局|" +
  27. "地方金融监督管理局|住房保障和房屋管理局|质量技术监督局|人力资源与社会保障局|公路管理局|国土资源局|" +
  28. "卫生和计划生育局|民事政务局|公众安全局|交通管理局|人力资源和社会保障局|劳动和社会保障局|" +
  29. "住房和城乡建设局|就业服务局|文物管理局|环境保护局|粮食和物资储备局|教育体育局|" +
  30. "体育局|教育局|招商局|农业局|农机局|水务局|林业局|财政局|审计局|统计局|商务局)$")
  31. var reglen *regexp.Regexp = regexp.MustCompile("^(.{1,5}|.{40,})$")
  32. var strReg *regexp.Regexp = regexp.MustCompile("^(.{0,3}工程队|.{0,3}总公司|_+|.{0,2}设备安装公司|.{0,2}装[饰修潢]公司|.{0,2}开发公司|.{0,4}有限公司|.{0,4}有限责任公司|.{0,4}设计院|建筑设计研?究?院|省文物考古研究所|经济开发区|省.*|镇人民政府|.{0,2}服务公司|" +
  33. ".{0,2}工程质量监督站|.{0,3}经[营销]部|.{0,3}事务所|.{0,4}工程公司|.{0,4}责任公司|.*勘测|.{0,4}研究院|.*能源建|.{0,2}安装工程|.*[市省]{1}|.{0,4}中心|.*区.?|" +
  34. ".{0,3}税务局|.{0,3}财政局|.{0,3}商行|.{0,2}公安处|.{0,2}测绘院|.{0,3}开发|.{0,2}建设局|.{0,2}经销部|.{0,3}委员会|.{0,2}分公司|.{0,2}管理站|.{0,2}事务管理局|" +
  35. ".*资料|.{0,2}办公用品.{1,2}|.*唯亭|.*设备|.+安装|.{0,2}技术服务|市.+[台院社局司]|城?区.+[府局室院]|县.+[院台局]|.{0,2}发展公司|经济技术开发|" +
  36. "发展和改革局|贵州有色地质|铝塑门窗加工|生产力促进中心|特殊普通合伙|工业集团公司|人民调解协会|人民政府办公厅|机电设备公司|房地产开发有限公司|.{0,4}商店|中等专业学校|" +
  37. "农村信用联社|.{0,4}经营部|.{0,4}销售部|驾驶员培训学校|.{2}县.{2}镇|保安服务总公司|住房和城乡建设局|地产评估事务所|生产资料门市部|×+|.{0,3}[0-9]{15}|.*[0-9]+|.*路|.*无字号名称.*|.*车|.*[,,]{1}.*|.*个体工商户|.*运输户)$")
  38. //非中文开头...
  39. var unstart_strReg *regexp.Regexp = regexp.MustCompile("^([\u4e00-\u9fa5])")
  40. //开头
  41. var start_strReg *regexp.Regexp = regexp.MustCompile("^([a-zA-Z]{1,2}[\u4e00-\u9fa5]{6,}|省|市|县|区|业绩|资格|中标|项目|预算单位)")
  42. //结尾
  43. var end_strReg *regexp.Regexp = regexp.MustCompile("(\\.|\\.\\.|餐馆|店|腻子|肉庄|画社|美发屋|发廊|网吧|网咖|零售点|新街|包子铺|奶茶铺|(株)|先生|女士|小姐|" +
  44. "资格|业绩|中标|项目|预算单位|摊位号|号|厅|室|部|点|馆|场|厂|床|所|处|站|行|中心|合作社|ATMS|" +
  45. "吧|楼|摊|摊位|廊|茶社|坊|圃|汤锅|园|民宿|美容院|房|排挡|府|庄|栈|队|批发|苑|养殖户|棋牌|农家乐|货运|" +
  46. "城|社|基地|会|服务|娱乐|种植|百货|汽修|农家菜|亭|小吃|快餐|粮库|卫生院|书画院|面|门窗|鸡排|屋|橱|堂|肉铺|服务|服饰|/*)$")
  47. //包含
  48. var con_strReg *regexp.Regexp = regexp.MustCompile("(\\?|?|%|代码标识|删除|错误|吊销|注销|发起人|待清理|&#|护照号|身份证号|" +
  49. "法人|&nbsp|国家拨入|借款|积累资金|单位自有|认股人|--|、|&|`|美元|[\u4e00-\u9fa5]{2,6}·[\u4e00-\u9fa5]{2,6})|" +
  50. "[a-zA-Z]{5,}")
  51. var uncon_strReg *regexp.Regexp = regexp.MustCompile("(园|政府|集团|公司|有限|合伙|企|院|学|局|处)")
  52. var YamlConfig YAMLConfig
  53. var MixDataMgo *util.MongodbSim
  54. var Filter *sensitive.Filter
  55. var es_type, es_index string
  56. var Client_Es *elastic.Client
  57. var data_mgo *MongodbSim
  58. func init() {
  59. yamlFile, err := ioutil.ReadFile(YAMLFILE)
  60. if err != nil {
  61. log.Fatalln("load conf error")
  62. }
  63. err = yaml.Unmarshal(yamlFile, &YamlConfig)
  64. if err != nil {
  65. fmt.Println(err.Error())
  66. }
  67. log.Printf("%#v", YamlConfig)
  68. MixDataMgo = &util.MongodbSim{
  69. MongodbAddr: YamlConfig.MixdataMgoAddr,
  70. Size: YamlConfig.MongodbPoolSize,
  71. DbName: YamlConfig.DbName,
  72. UserName: YamlConfig.UserName,
  73. PassWord: YamlConfig.PassWord,
  74. }
  75. MixDataMgo.InitPool()
  76. data_mgo = &MongodbSim{
  77. MongodbAddr: "192.168.3.207:27092",
  78. DbName: "zhengkun",
  79. Size: 10,
  80. UserName: "",
  81. Password: "",
  82. }
  83. data_mgo.InitPool()
  84. Client_Es ,_= elastic.NewClient(http.DefaultClient, "http://192.168.3.11:9800")
  85. es_type, es_index = "azktest","azktest"
  86. }
  87. func main() {
  88. //测试
  89. temporaryTest()
  90. return
  91. if YamlConfig.IsAddTask==0{
  92. initSensitiveWordsData() //初始化敏感词数据
  93. }else {
  94. go addTaskSensitiveWordsData() //增量-改配置文件
  95. }
  96. lis, err := net.Listen("tcp", YamlConfig.Port)
  97. if err != nil {
  98. log.Fatalf("failed to listen: %v", err)
  99. }
  100. s := grpc.NewServer()
  101. proto_grpc.RegisterSensitiveWordsServer(s, &server{})
  102. log.Println("server start:", YamlConfig.Port)
  103. if err := s.Serve(lis); err != nil {
  104. log.Fatalf("failed to serve: %v", err)
  105. }
  106. }
  107. //协议方法---等
  108. type server struct {
  109. proto_grpc.SensitiveWordsServer
  110. }
  111. func (s *server) Search(ctx context.Context, in *proto_grpc.Request) (*proto_grpc.ResultSensitiveWords, error) {
  112. text := in.GetText()
  113. log.Println(text)
  114. findAll := Filter.FindAll(text)
  115. return &proto_grpc.ResultSensitiveWords{SensitiveWords: strings.Join(findAll, ",")}, nil
  116. }
  117. type YAMLConfig struct {
  118. MixdataMgoAddr string `yaml:"mixdataMgoAddr"`
  119. UserName string `yaml:"userName"`
  120. PassWord string `yaml:"passWord"`
  121. DbName string `yaml:"dbName"`
  122. MongodbPoolSize int `yaml:"mongodbPoolSize"`
  123. TaskGteId string `yaml:"taskGteId"`
  124. TaskLteId string `yaml:"taskLteId"`
  125. IsAddTask int `yaml:"isAddTask"`
  126. Port string `yaml:"port"`
  127. }
  128. //其他方法
  129. func StringTOBsonId(id string) primitive.ObjectID {
  130. objectId, _ := primitive.ObjectIDFromHex(id)
  131. return objectId
  132. }
  133. func BsonTOStringId(id interface{}) string {
  134. return id.(primitive.ObjectID).Hex()
  135. }
  136. func toMegaBytes(bytes uint64) float64 {
  137. return float64(bytes) / 1024 / 1024
  138. }
  139. func IntAll(num interface{}) int {
  140. return IntAllDef(num, 0)
  141. }
  142. func Int64All(num interface{}) int64 {
  143. if i, ok := num.(int64); ok {
  144. return int64(i)
  145. } else if i0, ok0 := num.(int32); ok0 {
  146. return int64(i0)
  147. } else if i1, ok1 := num.(float64); ok1 {
  148. return int64(i1)
  149. } else if i2, ok2 := num.(int); ok2 {
  150. return int64(i2)
  151. } else if i3, ok3 := num.(float32); ok3 {
  152. return int64(i3)
  153. } else if i4, ok4 := num.(string); ok4 {
  154. i64, _ := strconv.ParseInt(i4, 10, 64)
  155. //in, _ := strconv.Atoi(i4)
  156. return i64
  157. } else if i5, ok5 := num.(int16); ok5 {
  158. return int64(i5)
  159. } else if i6, ok6 := num.(int8); ok6 {
  160. return int64(i6)
  161. } else if i7, ok7 := num.(*big.Int); ok7 {
  162. in, _ := strconv.ParseInt(fmt.Sprint(i7), 10, 64)
  163. return int64(in)
  164. } else if i8, ok8 := num.(*big.Float); ok8 {
  165. in, _ := strconv.ParseInt(fmt.Sprint(i8), 10, 64)
  166. return int64(in)
  167. } else {
  168. return 0
  169. }
  170. }
  171. func Float64All(num interface{}) float64 {
  172. if i, ok := num.(float64); ok {
  173. return float64(i)
  174. } else if i0, ok0 := num.(int32); ok0 {
  175. return float64(i0)
  176. } else if i1, ok1 := num.(int64); ok1 {
  177. return float64(i1)
  178. } else if i2, ok2 := num.(int); ok2 {
  179. return float64(i2)
  180. } else if i3, ok3 := num.(float32); ok3 {
  181. return float64(i3)
  182. } else if i4, ok4 := num.(string); ok4 {
  183. in, _ := strconv.ParseFloat(i4, 64)
  184. return in
  185. } else if i5, ok5 := num.(int16); ok5 {
  186. return float64(i5)
  187. } else if i6, ok6 := num.(int8); ok6 {
  188. return float64(i6)
  189. } else if i6, ok6 := num.(uint); ok6 {
  190. return float64(i6)
  191. } else if i6, ok6 := num.(uint8); ok6 {
  192. return float64(i6)
  193. } else if i6, ok6 := num.(uint16); ok6 {
  194. return float64(i6)
  195. } else if i6, ok6 := num.(uint32); ok6 {
  196. return float64(i6)
  197. } else if i6, ok6 := num.(uint64); ok6 {
  198. return float64(i6)
  199. } else if i7, ok7 := num.(*big.Float); ok7 {
  200. in, _ := strconv.ParseFloat(fmt.Sprint(i7), 64)
  201. return float64(in)
  202. } else if i8, ok8 := num.(*big.Int); ok8 {
  203. in, _ := strconv.ParseFloat(fmt.Sprint(i8), 64)
  204. return float64(in)
  205. } else {
  206. return 0
  207. }
  208. }
  209. func IntAllDef(num interface{}, defaultNum int) int {
  210. if i, ok := num.(int); ok {
  211. return int(i)
  212. } else if i0, ok0 := num.(int32); ok0 {
  213. return int(i0)
  214. } else if i1, ok1 := num.(float64); ok1 {
  215. return int(i1)
  216. } else if i2, ok2 := num.(int64); ok2 {
  217. return int(i2)
  218. } else if i3, ok3 := num.(float32); ok3 {
  219. return int(i3)
  220. } else if i4, ok4 := num.(string); ok4 {
  221. in, _ := strconv.Atoi(i4)
  222. return int(in)
  223. } else if i5, ok5 := num.(int16); ok5 {
  224. return int(i5)
  225. } else if i6, ok6 := num.(int8); ok6 {
  226. return int(i6)
  227. } else if i7, ok7 := num.(*big.Int); ok7 {
  228. in, _ := strconv.Atoi(fmt.Sprint(i7))
  229. return int(in)
  230. } else if i8, ok8 := num.(*big.Float); ok8 {
  231. in, _ := strconv.Atoi(fmt.Sprint(i8))
  232. return int(in)
  233. } else {
  234. return defaultNum
  235. }
  236. }
  237. func ObjToString(old interface{}) string {
  238. if nil == old {
  239. return ""
  240. } else {
  241. r, _ := old.(string)
  242. return r
  243. }
  244. }
  245. func ObjToStringDef(old interface{}, defaultstr string) string {
  246. if nil == old {
  247. return defaultstr
  248. } else {
  249. r, _ := old.(string)
  250. if r == "" {
  251. return defaultstr
  252. }
  253. return r
  254. }
  255. }
  256. //对象数组转成string数组
  257. func ObjArrToStringArr(old []interface{}) []string {
  258. if old != nil {
  259. new := make([]string, len(old))
  260. for i, v := range old {
  261. new[i] = v.(string)
  262. }
  263. return new
  264. } else {
  265. return nil
  266. }
  267. }
  268. //对象数组转成map数组
  269. func ObjArrToMapArr(old []interface{}) []map[string]interface{} {
  270. if old != nil {
  271. new := make([]map[string]interface{}, len(old))
  272. for i, v := range old {
  273. new[i] = v.(map[string]interface{})
  274. }
  275. return new
  276. } else {
  277. return nil
  278. }
  279. }
  280. //map数组转成对象数组
  281. func MapArrToObjArr(old []map[string]interface{}) []interface{} {
  282. if old != nil {
  283. new := make([]interface{}, len(old))
  284. for i, v := range old {
  285. new[i] = v
  286. }
  287. return new
  288. } else {
  289. return nil
  290. }
  291. }