sensitive.go 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. /*
  2. * Copyright (c) 2018
  3. * time: 6/24/18 3:22 PM
  4. * author: linhuanchao
  5. * e-mail: 873085747@qq.com
  6. */
  7. package Sensitive
  8. import (
  9. "io/ioutil"
  10. "os"
  11. "strings"
  12. )
  13. type SensitiveMap struct {
  14. sensitiveNode map[string]interface{}
  15. isEnd bool
  16. }
  17. var s *SensitiveMap
  18. func GetMap() *SensitiveMap {
  19. if s == nil {
  20. //currentPath, _ := filepath.Abs(filepath.Dir(os.Args[0]))
  21. //configure := config.GetConfig()
  22. //dictionaryPath := currentPath + configure.DictionaryPath
  23. dictionaryPath := "../Config/dictionary.txt"
  24. s = InitDictionary(s, dictionaryPath)
  25. }
  26. return s
  27. }
  28. /*
  29. 初始化敏感词词典结构体
  30. */
  31. func initSensitiveMap() *SensitiveMap {
  32. return &SensitiveMap{
  33. sensitiveNode: make(map[string]interface{}),
  34. isEnd: false,
  35. }
  36. }
  37. /*
  38. 读取词典文件
  39. */
  40. func readDictionary(path string) []string {
  41. file, err := os.Open(path)
  42. if err != nil {
  43. panic(err)
  44. }
  45. defer file.Close()
  46. str, err := ioutil.ReadAll(file)
  47. dictionary := strings.Fields(string(str))
  48. return dictionary
  49. }
  50. /*
  51. 初始化敏感词词典,根据DFA算法构建trie
  52. */
  53. func InitDictionary(s *SensitiveMap, dictionaryPath string) *SensitiveMap {
  54. s = initSensitiveMap()
  55. dictionary := readDictionary(dictionaryPath)
  56. for _, words := range dictionary {
  57. sMapTmp := s
  58. w := []rune(words)
  59. wordsLength := len(w)
  60. for i := 0; i < wordsLength; i++ {
  61. t := string(w[i])
  62. isEnd := false
  63. //如果是敏感词的最后一个字,则确定状态
  64. if i == (wordsLength - 1) {
  65. isEnd = true
  66. }
  67. func(tx string) {
  68. if _, ok := sMapTmp.sensitiveNode[tx]; !ok { //如果该字在该层级索引中找不到,则创建新的层级
  69. sMapTemp := new(SensitiveMap)
  70. sMapTemp.sensitiveNode = make(map[string]interface{})
  71. sMapTemp.isEnd = isEnd
  72. sMapTmp.sensitiveNode[tx] = sMapTemp
  73. }
  74. sMapTmp = sMapTmp.sensitiveNode[tx].(*SensitiveMap) //进入下一层级
  75. sMapTmp.isEnd = isEnd
  76. }(t)
  77. }
  78. }
  79. return s
  80. }
  81. /*
  82. 作用:检查是否含有敏感词,仅返回检查到的第一个敏感词
  83. 返回值:敏感词,是否含有敏感词
  84. */
  85. func (s *SensitiveMap) CheckSensitive(text string) (string, bool) {
  86. content := []rune(text)
  87. contentLength := len(content)
  88. result := false
  89. ta := ""
  90. for index := range content {
  91. sMapTmp := s
  92. target := ""
  93. in := index
  94. for {
  95. wo := string(content[in])
  96. target += wo
  97. if _, ok := sMapTmp.sensitiveNode[wo]; ok {
  98. if sMapTmp.sensitiveNode[wo].(*SensitiveMap).isEnd {
  99. result = true
  100. break
  101. }
  102. if in == contentLength-1 {
  103. break
  104. }
  105. sMapTmp = sMapTmp.sensitiveNode[wo].(*SensitiveMap) //进入下一层级
  106. in++
  107. } else {
  108. break
  109. }
  110. }
  111. if result {
  112. ta = target
  113. break
  114. }
  115. }
  116. return ta, result
  117. }
  118. /*
  119. 作用:返回文本中的所有敏感词
  120. 返回值:数组,格式为“["敏感词"][敏感词在检测文本中的索引位置,敏感词长度]”
  121. */
  122. type Target struct {
  123. Indexes []int
  124. Len int
  125. }
  126. func (s *SensitiveMap) FindAllSensitive(text string) map[string]*Target {
  127. content := []rune(text)
  128. contentLength := len(content)
  129. result := false
  130. ta := make(map[string]*Target)
  131. for index := range content {
  132. sMapTmp := s
  133. target := ""
  134. in := index
  135. result = false
  136. for {
  137. wo := string(content[in])
  138. target += wo
  139. if _, ok := sMapTmp.sensitiveNode[wo]; ok {
  140. if sMapTmp.sensitiveNode[wo].(*SensitiveMap).isEnd {
  141. result = true
  142. break
  143. }
  144. if in == contentLength-1 {
  145. break
  146. }
  147. sMapTmp = sMapTmp.sensitiveNode[wo].(*SensitiveMap) //进入下一层级
  148. in++
  149. } else {
  150. break
  151. }
  152. }
  153. if result {
  154. if _, targetInTa := ta[target]; targetInTa {
  155. ta[target].Indexes = append(ta[target].Indexes, index)
  156. } else {
  157. ta[target] = &Target{
  158. Indexes: []int{index},
  159. Len: len([]rune(target)),
  160. }
  161. }
  162. }
  163. }
  164. return ta
  165. }