wcj 6 лет назад
Родитель
Сommit
cf915e5a6e
3 измененных файлов с 47 добавлено и 45 удалено
  1. 1 1
      src/jy/pretreated/analykv.go
  2. 36 36
      src/jy/pretreated/analystep.go
  3. 10 8
      src/jy/pretreated/division.go

+ 1 - 1
src/jy/pretreated/analykv.go

@@ -13,7 +13,7 @@ var Key = regexp.MustCompile("[:::]")
 var Time = regexp.MustCompile("[\\d]")
 var dh = regexp.MustCompile("[,,.]")
 var space = regexp.MustCompile("[\\s\\n \u3000\u2003\u00a0]+")
-var val = regexp.MustCompile("[^\\s\\n \u3000\u2003\u00a0,,。!;;]")
+var val = regexp.MustCompile("[^\\s\\n \u3000\u2003\u00a0,,。!;;\\-]")
 var matchkh = map[string]string{
 	"(": ")",
 	"(": ")",

+ 36 - 36
src/jy/pretreated/analystep.go

@@ -30,7 +30,7 @@ func AnalyStart(job *util.Job) {
 		}
 	}
 	blockArrays, _ := DivideBlock(job.CategorySecond, con, 1, job.RuleBlock) //分块
-	if len(blockArrays) > 0 { //有分块
+	if len(blockArrays) > 0 {                                                //有分块
 		//从块里面找分包
 		job.BlockPackage = FindPackageFromBlocks(&blockArrays, job.Title) //从块里面找分包
 		for _, bl := range blockArrays {
@@ -109,43 +109,43 @@ func FindProjectCode(newCon string, job *util.Job) {
 	var proCode string
 	blCode := &util.Block{}
 	/*		if newConTMP := projectcodeRegAll.FindString(newCon); newConTMP != "" { //项目名称项目编号一起的
-			//5d424bdfa5cb26b9b7ac7a85
-			//5d425a48a5cb26b9b7df5fec
-			//5d425506a5cb26b9b7cd2c3c
-			splitStr := strings.Split(newConTMP, " ")
-			if len(splitStr) >= 2 {
-				if utf8.RuneCountInString(splitStr[len(splitStr)-1]) > 5 {
-					newCon = "项目编号:" + splitStr[len(splitStr)-1]
-				} else if tmpstr := projectcodeRegAll2.FindString(splitStr[0]); tmpstr != "" {
-					//5d4253f3a5cb26b9b7ca2662
-					newCon = "项目编号:" + tmpstr
-				}
-			} else if len(splitStr) == 1 {
-				if tmpstr := projectcodeRegAll2.FindString(splitStr[0]); tmpstr != "" {
-					newCon = "项目编号:" + tmpstr
-				} else if strings.Contains(newConTMP, "、") {
-					tmpstrs := strings.Split(newCon, "、")
-					newCon = "项目编号:" + tmpstrs[len(tmpstrs)-1]
+				//5d424bdfa5cb26b9b7ac7a85
+				//5d425a48a5cb26b9b7df5fec
+				//5d425506a5cb26b9b7cd2c3c
+				splitStr := strings.Split(newConTMP, " ")
+				if len(splitStr) >= 2 {
+					if utf8.RuneCountInString(splitStr[len(splitStr)-1]) > 5 {
+						newCon = "项目编号:" + splitStr[len(splitStr)-1]
+					} else if tmpstr := projectcodeRegAll2.FindString(splitStr[0]); tmpstr != "" {
+						//5d4253f3a5cb26b9b7ca2662
+						newCon = "项目编号:" + tmpstr
+					}
+				} else if len(splitStr) == 1 {
+					if tmpstr := projectcodeRegAll2.FindString(splitStr[0]); tmpstr != "" {
+						newCon = "项目编号:" + tmpstr
+					} else if strings.Contains(newConTMP, "、") {
+						tmpstrs := strings.Split(newCon, "、")
+						newCon = "项目编号:" + tmpstrs[len(tmpstrs)-1]
+					}
 				}
 			}
-		}
-		proCode = projectcodeReg.FindString(newCon)
-		if proCode != "" {
-			ckv := GetKVAll(proCode, job.Title, nil, 1)
-			blCode.ColonKV = ckv
-			blCode.Text = proCode
-			job.Block = append(job.Block, blCode)
-		} else if proCode = projectcodeReg2.FindString(newCon); proCode != "" {
-			ckv := GetKVAll(proCode, job.Title, nil, 1)
-			blCode.ColonKV = ckv
-			blCode.Text = proCode
-			job.Block = append(job.Block, blCode)
-		} else if proCode = projectcodeReg3.FindString(newCon); proCode != "" {
-			ckv := GetKVAll(proCode, job.Title, nil, 1)
-			blCode.Text = proCode
-			blCode.ColonKV = ckv
-			job.Block = append(job.Block, blCode)
-		}*/
+			proCode = projectcodeReg.FindString(newCon)
+			if proCode != "" {
+				ckv := GetKVAll(proCode, job.Title, nil, 1)
+				blCode.ColonKV = ckv
+				blCode.Text = proCode
+				job.Block = append(job.Block, blCode)
+			} else if proCode = projectcodeReg2.FindString(newCon); proCode != "" {
+				ckv := GetKVAll(proCode, job.Title, nil, 1)
+				blCode.ColonKV = ckv
+				blCode.Text = proCode
+				job.Block = append(job.Block, blCode)
+			} else if proCode = projectcodeReg3.FindString(newCon); proCode != "" {
+				ckv := GetKVAll(proCode, job.Title, nil, 1)
+				blCode.Text = proCode
+				blCode.ColonKV = ckv
+				job.Block = append(job.Block, blCode)
+			}*/
 	if proCode = jsonReg.FindString(newCon); proCode != "" {
 		jsonMap := make(map[string]string)
 		json.Unmarshal([]byte(proCode), &jsonMap)

+ 10 - 8
src/jy/pretreated/division.go

@@ -158,6 +158,7 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock) ([]*ut
 		if k+1 != index {
 			if k == 0 {
 				returnValue = 3
+				break
 			} else {
 				if currentIndex+1 != index {
 					//如果序号不是连续的,不往下走
@@ -177,7 +178,6 @@ func DivideBlock(tp, content string, from int, ruleBlock *util.RuleBlock) ([]*ut
 			}
 			currentIndex = index
 		}
-
 		//
 		title := serialTitles[2]                         //标题
 		title = regTrimSpace.ReplaceAllString(title, "") //清除前后空格
@@ -428,6 +428,7 @@ func appendWarpStop(text string) string {
 	}
 	return text
 }
+
 //分段
 func DivideSegmentHtml(txt string) []*util.Segment {
 	//先分段
@@ -448,6 +449,7 @@ func DivideSegmentHtml(txt string) []*util.Segment {
 	}
 	return segs
 }
+
 //分段
 func DivideSegment(txt string) []*util.Segment {
 	//先分段
@@ -463,7 +465,7 @@ func DivideSegment(txt string) []*util.Segment {
 				tmpstr += fmt.Sprint(r)
 				return false
 			} else if tmpstr == fmt.Sprint(r) {
-				if r == 46 || r == 12289{
+				if r == 46 || r == 12289 {
 					tmpstr = ""
 				}
 				return false
@@ -473,7 +475,7 @@ func DivideSegment(txt string) []*util.Segment {
 				return true
 			}
 		}
-		tmpstr= ""
+		tmpstr = ""
 		return r == 10 || r == 13
 	})
 	//再去除空行
@@ -588,7 +590,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 			return false, ""
 		}
 		//
-		is := regexp.MustCompile(v[0] + "[::]*").FindAllStringIndex(con, -1)
+		is := regexp.MustCompile(v[0]+"[::]*").FindAllStringIndex(con, -1)
 		for _, sv := range is {
 			appendWarpIndex = append(appendWarpIndex, sv[0])
 		}
@@ -628,13 +630,13 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 			indexPkgMap[sv[0]] = v[0]
 		}
 		//key在包前面,并且在一行的开头
-		keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)" + pgflag).FindAllStringSubmatchIndex(con, -1)
+		keys := regexp.MustCompile("([\r\n]|^)([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::\\s\u3000\u2003\u00a0]+.*?)"+pgflag).FindAllStringSubmatchIndex(con, -1)
 		if len(keys) == 0 {
 			//key在包前面,并且key以冒号结尾
-			keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
+			keys = regexp.MustCompile("()([\u4e00-\u9fa5]{2,30}?([((].{1,8}?[))])?[::]+[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
 		}
 		if len(keys) == 0 {
-			keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])" + pgflag).FindAllStringSubmatchIndex(con, -1)
+			keys = regexp.MustCompile("()注[::]([\u4e00-\u9fa5]{2,8}?([((].{1,8}?[))])?[\\s\u3000\u2003\u00a0]*[\r\n])"+pgflag).FindAllStringSubmatchIndex(con, -1)
 		}
 		for _, key := range keys {
 			startEndMap[key[5]] = key[4]
@@ -688,7 +690,7 @@ func divisionPackageChild(blockPackage *map[string]*util.BlockPackage, content,
 			}
 			index := util.PackageNumberConvert(bk)
 			//去掉前缀,空格必须要加,分kv的时候要用
-			text = regexp.MustCompile(bv[0] + "[::]*").ReplaceAllString(text, "")
+			text = regexp.MustCompile(bv[0]+"[::]*").ReplaceAllString(text, "")
 			headKey := ""
 			if indexKeyStringMap[iv] != "" {
 				//if !filterPkgTitleKey.MatchString(indexKeyStringMap[iv]) {