6 years ago · db244f0095
--- a/src/github.com/dlclark/regexp2
+++ b/src/github.com/dlclark/regexp2
@@ -0,0 +1 @@
 
				+Subproject commit 902a5ce7a7812e2ba9f73b9d96c09d5136df39cd
			
--- a/src/github.com/dlclark/regexp2.zip
+++ b/src/github.com/dlclark/regexp2.zip
--- a/src/jy/clear/README.MD
+++ b/src/jy/clear/README.MD
@@ -0,0 +1,3 @@
 
				+**此目录用于放置N多的清理方法**
			
 
				+可以在配置中调用，
			
 
				+主要是收尾工作。
			
--- a/src/jy/clear/clear.go
+++ b/src/jy/clear/clear.go
@@ -0,0 +1,56 @@
 
				+package clear
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"regexp"
			
 
				+	"sync"
			
 
				+)
			
 
				+
			
 
				+//方法清单
			
 
				+var clearfns = make(map[string]func(data []interface{}) []interface{})
			
 
				+var lock sync.RWMutex
			
 
				+
			
 
				+func init() {
			
 
				+	BindFn("cutspace", CutSpace)                 //去除首尾空格
			
 
				+	BindFn("cutallspace", CutAllSpace)           //去除所有空格
			
 
				+	BindFn("toint", ObjToInt)                    //转int
			
 
				+	BindFn("tofloat", ObjToFloat)                //转float
			
 
				+	BindFn("totimestamp", ObjToTimestamp)        //转时间戳
			
 
				+	BindFn("tomoney", ObjToMoney)                //转换金额
			
 
				+	BindFn("getcurrency", GetCurrency)           //获取币种
			
 
				+	BindFn("cutSymbol", CutSymbol)               //清理符号
			
 
				+	BindFn("cutNotPrs", CutNotPrs)               //不成对出现的符号，把符号后面的内容清理掉
			
 
				+	BindFn("rateToFloat", RateToFloat)           //费率转小数
			
 
				+	BindFn("clearAllWord", ClearAllWord)         //全部是汉字或者特殊符号的情况，清理掉
			
 
				+	BindFn("clearMaxAmount", ClearMaxAmount)     //大于1万亿的过滤掉
			
 
				+	BindFn("clearProjectName", ClearProjectName) //清理项目名称
			
 
				+	BindFn("getPhone", GetPhone)                 //取手机号
			
 
				+}
			
 
				+
			
 
				+//绑定清理方法
			
 
				+func BindFn(fnname string, fn func(data []interface{}) []interface{}) {
			
 
				+	lock.Lock()
			
 
				+	clearfns[fnname] = fn
			
 
				+	lock.Unlock()
			
 
				+}
			
 
				+
			
 
				+//执行清理动作，如果调用的清理方法不存在，则不做处理
			
 
				+func DoClearFn(clear []string, data []interface{}) []interface{} {
			
 
				+	if len(clear) == 0 {
			
 
				+		return data
			
 
				+	}
			
 
				+	for _, fnname := range clear {
			
 
				+		if v, ok := clearfns[fnname]; ok {
			
 
				+			data = v(data)
			
 
				+		}
			
 
				+	}
			
 
				+	return data
			
 
				+}
			
 
				+
			
 
				+//取手机号
			
 
				+var PhoneReg = regexp.MustCompile("((([（(]\\d{3,4}[)）])?(\\d{6,12}([×―—－\\-]+\\d{3,4})?|\\d{3,4}[×―—－\\-]+[\u3000\u2003\u00a0\\s]*\\d{6,12}([×―—－\\-]+\\d{4})?|(\\d{2}[×―—－\\-])+\\d{8}[×―—－\\-](\\d{3}[、])+)(转\\d{3,4})?[或/、,，;；\u3000\u2003\u00a0\\s]*)+(\\d{3,})?)")
			
 
				+
			
 
				+func GetPhone(data []interface{}) []interface{} {
			
 
				+	data[0] = PhoneReg.FindString(fmt.Sprint(data[0]))
			
 
				+	return data
			
 
				+}
			
--- a/src/jy/clear/cutspace.go
+++ b/src/jy/clear/cutspace.go
@@ -0,0 +1,150 @@
 
				+package clear
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"regexp"
			
 
				+	"strings"
			
 
				+)
			
 
				+
			
 
				+var cutSpace *regexp.Regexp
			
 
				+var cutAllSpace *regexp.Regexp
			
 
				+var catSymbol *regexp.Regexp
			
 
				+var spaces = []string{"\u3000", "\u2003", "\u00a0"}
			
 
				+
			
 
				+func init() {
			
 
				+	cutSpace, _ = regexp.Compile(`^\s*|\s*$`)
			
 
				+	cutAllSpace, _ = regexp.Compile(`\s*`)
			
 
				+	catSymbol, _ = regexp.Compile(`[]+`)
			
 
				+}
			
 
				+
			
 
				+var LableStr = "&?(amp|nbsp|#8266);?|(&lt;).*?(&gt;?)"
			
 
				+var at = rune('&')
			
 
				+var ed = rune(';')
			
 
				+var lableMap = map[string]rune{
			
 
				+	"&amp;":  rune('&'),
			
 
				+	"&nbsp;": rune(' '),
			
 
				+	"&gt;":   rune('>'),
			
 
				+	"&lt;":   rune('<'),
			
 
				+}
			
 
				+
			
 
				+//处理转义标签
			
 
				+func CutLableStr(con string) string {
			
 
				+	for i := 0; i < 3; i++ {
			
 
				+		runes := []rune{}
			
 
				+		pools := []rune{}
			
 
				+		bpool := false
			
 
				+		strings.IndexFunc(con, func(s rune) bool {
			
 
				+			if !bpool && s == at {
			
 
				+				bpool = true
			
 
				+				pools = []rune{}
			
 
				+			}
			
 
				+			if bpool {
			
 
				+				pools = append(pools, s)
			
 
				+				if s == ed { //结束
			
 
				+					lb := lableMap[string(pools)]
			
 
				+					if lb != 0 {
			
 
				+						runes = append(runes, lb)
			
 
				+					} else {
			
 
				+						runes = append(runes, pools...)
			
 
				+					}
			
 
				+					bpool = false
			
 
				+				} else if len(pools) > 6 {
			
 
				+					bpool = false
			
 
				+					runes = append(runes, pools...)
			
 
				+				}
			
 
				+			} else {
			
 
				+				runes = append(runes, s)
			
 
				+			}
			
 
				+			return false
			
 
				+		})
			
 
				+		str1 := string(runes)
			
 
				+		if i > 0 && con == str1 {
			
 
				+			break
			
 
				+		}
			
 
				+		con = str1
			
 
				+	}
			
 
				+	return con
			
 
				+}
			
 
				+
			
 
				+//清理开始、结尾的空白字符
			
 
				+func CutSpace(data []interface{}) []interface{} {
			
 
				+	tmp := cutSpace.ReplaceAllString(strings.Replace(fmt.Sprint(data[0]), "　", " ", -1), "")
			
 
				+	tmp = replaceSymbol(tmp, spaces)
			
 
				+	//fmt.Println("cutspace", tmp)
			
 
				+	data[0] = tmp
			
 
				+	return data
			
 
				+}
			
 
				+
			
 
				+//清理所有空白符
			
 
				+func CutAllSpace(data []interface{}) []interface{} {
			
 
				+	tmp := cutAllSpace.ReplaceAllString(fmt.Sprint(data[0]), "")
			
 
				+	tmp = replaceSymbol(tmp, spaces)
			
 
				+	data[0] = tmp
			
 
				+	return data
			
 
				+}
			
 
				+
			
 
				+//清理符号
			
 
				+func CutSymbol(data []interface{}) []interface{} {
			
 
				+	value := fmt.Sprint(CutSpace(data)[0])
			
 
				+	symbol := ",，;；:：'\"“”。.\\?？、/+=\\_—*&……\\^%$￥@!！`~·"
			
 
				+	startSymbol := "^[" + "）\\)>》】\\]}｝〕" + symbol + "]+"
			
 
				+	endSymbol := "[" + "（\\(<《【\\[{｛〔" + symbol + "]+$"
			
 
				+	startReg := regexp.MustCompile(startSymbol)
			
 
				+	endReg := regexp.MustCompile(endSymbol)
			
 
				+	value = startReg.ReplaceAllString(value, "")
			
 
				+	value = endReg.ReplaceAllString(value, "")
			
 
				+	value = fmt.Sprint(CutSpace([]interface{}{value, data[1]})[0])
			
 
				+	return []interface{}{value, data[1]}
			
 
				+}
			
 
				+
			
 
				+//不成对出现的符号，把符号后面的内容清理掉
			
 
				+func CutNotPrs(data []interface{}) []interface{} {
			
 
				+	return childCutNotPrs(data, 1)
			
 
				+}
			
 
				+
			
 
				+//不成对出现的符号，把符号后面的内容清理掉
			
 
				+func childCutNotPrs(data []interface{}, count int) []interface{} {
			
 
				+	value := fmt.Sprint(data[0])
			
 
				+	if count >= 50 || value == "" {
			
 
				+		return data
			
 
				+	}
			
 
				+	startChars := []string{"[（(]", "[\\[【]", "[{｛]", "[<《]", "[>》]", "〔"}
			
 
				+	endChars := []string{"[）)]", "[\\]】]", "[}｝]", "[<《]", "[>》]", "〕"}
			
 
				+	for k, v := range startChars {
			
 
				+		sReg := regexp.MustCompile(v)
			
 
				+		eReg := regexp.MustCompile(endChars[k])
			
 
				+		sIndex := sReg.FindAllStringIndex(value, -1)
			
 
				+		eIndex := eReg.FindAllStringIndex(value, -1)
			
 
				+		sCount := len(sIndex)
			
 
				+		eCount := len(eIndex)
			
 
				+		if sCount == eCount {
			
 
				+			continue
			
 
				+		}
			
 
				+		//清理前面
			
 
				+		if sCount > eCount {
			
 
				+			value = value[sIndex[eCount][1]:]
			
 
				+		}
			
 
				+		//清理后面
			
 
				+		if sCount < eCount {
			
 
				+			value = value[:eIndex[sCount][0]]
			
 
				+		}
			
 
				+	}
			
 
				+	//交叉出现情况处理
			
 
				+	sReplReg := regexp.MustCompile("[（(\\[【{｛〔<《][^）)\\]】}｝〕>》]*$")
			
 
				+	eReplReg := regexp.MustCompile("^[^（(\\[【{｛〔<《]*[）)\\]】}｝〕>》]")
			
 
				+	if sReplReg.MatchString(value) || eReplReg.MatchString(value) {
			
 
				+		value = sReplReg.ReplaceAllString(value, "")
			
 
				+		value = eReplReg.ReplaceAllString(value, "")
			
 
				+		value = fmt.Sprint(childCutNotPrs([]interface{}{value, data[1]}, count+1)[0])
			
 
				+	}
			
 
				+	data[0] = value
			
 
				+	return data
			
 
				+}
			
 
				+
			
 
				+//全部是汉字或者特殊符号的情况，清理掉
			
 
				+func ClearAllWord(data []interface{}) []interface{} {
			
 
				+	value := fmt.Sprint(data[0])
			
 
				+	reg := regexp.MustCompile("^[\u4e00-\u9fa5、,，.。?？'\"“”‘’·~！@#￥$%…&*（）()\\-—+=【】\\[\\]｛｝{}<>《》|\\/\\s]+$")
			
 
				+	data[0] = reg.ReplaceAllString(value, "")
			
 
				+	return data
			
 
				+}
			
--- a/src/jy/clear/getratecurrency.go
+++ b/src/jy/clear/getratecurrency.go
@@ -0,0 +1,32 @@
 
				+// getratecurrency
			
 
				+package clear
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"regexp"
			
 
				+)
			
 
				+
			
 
				+var currency *regexp.Regexp
			
 
				+var encyitem = map[string]string{
			
 
				+	"$": "美元", "＄": "美元", "美元": "美元",
			
 
				+	//待续
			
 
				+}
			
 
				+
			
 
				+func init() {
			
 
				+	//提取币种
			
 
				+	currency, _ = regexp.Compile(`[$|＄|美元]+`)
			
 
				+}
			
 
				+
			
 
				+//获取币种
			
 
				+func GetCurrency(data []interface{}) []interface{} {
			
 
				+	val := "人民币"
			
 
				+	currency.ReplaceAllStringFunc(fmt.Sprint(data[0]), func(key string) string {
			
 
				+		v := encyitem[key]
			
 
				+		if v != "" {
			
 
				+			val = v
			
 
				+		}
			
 
				+		return key
			
 
				+	})
			
 
				+	data[0] = val
			
 
				+	return data
			
 
				+}
			
--- a/src/jy/clear/projectname.go
+++ b/src/jy/clear/projectname.go
@@ -0,0 +1,36 @@
 
				+// projectname
			
 
				+package clear
			
 
				+
			
 
				+import (
			
 
				+	"regexp"
			
 
				+
			
 
				+	"github.com/dlclark/regexp2"
			
 
				+)
			
 
				+
			
 
				+//清理项目名称
			
 
				+
			
 
				+var clearPreRegNameCode = regexp.MustCompile(`([\(\)\-\[\]【】（）a-zA-Z0-9_—:：]{10,30})?(.+)?`)
			
 
				+var clearEndRegNameCode = regexp.MustCompile(`(.+?)?([\(\)\-\[\]【】（）a-zA-Z0-9_—:：]{8,100})$`)
			
 
				+var clearSymbol = regexp.MustCompile(`["“”]`)
			
 
				+var noclearNum = regexp2.MustCompile(`^(?!.+(?:标段|包|子项目|升级改造)[0-9１２３４５６７８９]{1,5})(.*)[0-9１２３４５６７８９]$`, regexp2.None)
			
 
				+var mustHan = regexp.MustCompile(`[\p{Han}]+`) //项目名称必须包含汉子
			
 
				+
			
 
				+func ClearProjectName(data []interface{}) []interface{} {
			
 
				+	value := clearPreRegNameCode.ReplaceAllString(CutSpace(data)[0].(string), "$2")
			
 
				+	value = clearEndRegNameCode.ReplaceAllString(value, "$1")
			
 
				+	b := mustHan.MatchString(value)
			
 
				+	if !b {
			
 
				+		value = ""
			
 
				+	}
			
 
				+	tmp := value
			
 
				+	for i := 0; i < 5; i++ {
			
 
				+		tmpval, _ := noclearNum.Replace(tmp, "$1", -1, -1)
			
 
				+		if tmpval == tmp {
			
 
				+			value = tmp
			
 
				+			break
			
 
				+		} else {
			
 
				+			tmp = tmpval
			
 
				+		}
			
 
				+	}
			
 
				+	return []interface{}{value, data[1]}
			
 
				+}
			
--- a/src/jy/clear/tonumber.go
+++ b/src/jy/clear/tonumber.go
@@ -0,0 +1,299 @@
 
				+// tonumber
			
 
				+package clear
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"regexp"
			
 
				+	"strconv"
			
 
				+	"strings"
			
 
				+)
			
 
				+
			
 
				+var contentUnit *regexp.Regexp //全文检索单位：万元
			
 
				+var regOperator *regexp.Regexp //运算符号
			
 
				+var regNumFloat *regexp.Regexp //提取整数或浮点数
			
 
				+var regStrUnit *regexp.Regexp  //提取单位
			
 
				+
			
 
				+var moneyRegChar *regexp.Regexp //提取中文数字
			
 
				+var numCapitals *regexp.Regexp  //中文大写金额过滤
			
 
				+
			
 
				+var regQianw *regexp.Regexp //部分千万单位
			
 
				+
			
 
				+var moneyChar = map[string]interface{}{ //"〇": "0", "零": "0",
			
 
				+	"一": float64(1), "壹": float64(1), "二": float64(2), "贰": float64(2), "三": float64(3), "叁": float64(3), "四": float64(4), "肆": float64(4), "五": float64(5), "伍": float64(5),
			
 
				+	"六": float64(6), "陆": float64(6), "七": float64(7), "柒": float64(7), "八": float64(8), "捌": float64(8), "九": float64(9), "玖": float64(9), "十": float64(10), "拾": float64(10),
			
 
				+	"百": float64(100), "佰": float64(100), "千": float64(1000), "仟": float64(1000), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000),
			
 
				+	"零": float64(0), "点": ".", "角": float64(0.1), "分": float64(0.01),
			
 
				+}
			
 
				+var moneyUnit = map[string]float64{
			
 
				+	"元": float64(1), "万": float64(10000), "亿": float64(100000000), "億": float64(100000000), //单位
			
 
				+}
			
 
				+
			
 
				+func init() {
			
 
				+	regOperator, _ = regexp.Compile(`[*|+|)*)]`)
			
 
				+	regNumFloat, _ = regexp.Compile(`([1-9]\d*|0)(\.\d+)?`)
			
 
				+	regStrUnit, _ = regexp.Compile(`[元|万|亿]`)
			
 
				+
			
 
				+	regStrChar := `[〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]`
			
 
				+	moneyRegChar, _ = regexp.Compile(regStrChar)
			
 
				+	contentUnit, _ = regexp.Compile(`(万元|单位/万)`)
			
 
				+	numCapitals, _ = regexp.Compile(`([〇|零|点|壹|贰|叁|肆|伍|陆|柒|捌|玖|拾|百|佰|千|仟|万|亿|億|元|圆|角|分|整|正]{4,40})`)
			
 
				+
			
 
				+	regQianw, _ = regexp.Compile(`\d{1,2}千万`)
			
 
				+}
			
 
				+
			
 
				+//转int
			
 
				+func ObjToInt(data []interface{}) []interface{} {
			
 
				+	tmp, err := strconv.Atoi(fmt.Sprint(data[0]))
			
 
				+	if err != nil {
			
 
				+		data[0] = 0
			
 
				+		return data
			
 
				+	} else {
			
 
				+		data[0] = tmp
			
 
				+		return data
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//转float,精度小数点4位
			
 
				+func ObjToFloat(data []interface{}) []interface{} {
			
 
				+	tmp, err := strconv.ParseFloat(fmt.Sprint(data[0]), 64)
			
 
				+	if err != nil {
			
 
				+		return []interface{}{float64(0), data[1]}
			
 
				+	} else {
			
 
				+		tmp, err = strconv.ParseFloat(strconv.FormatFloat(tmp, 'f', 4, 64), 64)
			
 
				+		if err != nil {
			
 
				+			return []interface{}{float64(0), data[1]}
			
 
				+		} else {
			
 
				+			return []interface{}{tmp, data[1]}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//金额转换
			
 
				+func ObjToMoney(data []interface{}) []interface{} {
			
 
				+	isfindUnit := true
			
 
				+	ret := capitalMoney(data)[0]
			
 
				+	if ret.(float64) < float64(10000) || ret.(float64) > float64(50000000000) {
			
 
				+		ret2, b := numMoney(data)
			
 
				+		isfindUnit = b
			
 
				+		if ret2[0].(float64) > ret.(float64) {
			
 
				+			ret = ret2[0]
			
 
				+		}
			
 
				+	}
			
 
				+	f, _ := strconv.ParseFloat(strconv.FormatFloat(ret.(float64), 'f', 4, 64), 64)
			
 
				+	if f < 1 {
			
 
				+		f = 0
			
 
				+	}
			
 
				+	//若果金额小于50，全文检索单位：万
			
 
				+	if f < 50 && f > 0 && isfindUnit {
			
 
				+		rep := contentUnit.FindAllStringIndex(fmt.Sprint(data[1]), -1)
			
 
				+		if len(rep) > 0 {
			
 
				+			f = f * 10000
			
 
				+		}
			
 
				+	}
			
 
				+	data[0] = f
			
 
				+	return data
			
 
				+}
			
 
				+
			
 
				+//数字金额转换
			
 
				+func numMoney(data []interface{}) ([]interface{}, bool) {
			
 
				+	tmp := fmt.Sprint(data[0])
			
 
				+	repUnit := float64(1)
			
 
				+	if regQianw.MatchString(tmp) {
			
 
				+		tmp = strings.Replace(tmp, "千万", "万", -1)
			
 
				+		repUnit = float64(1000)
			
 
				+	}
			
 
				+	tmp = replaceSymbol(tmp, []string{",", "，", "（", "）", "(", ")", "：", "\n"})
			
 
				+	tmp = replaceString(tmp, []string{"万元", "亿元", "．"}, []string{"万", "亿", "."})
			
 
				+	tmp = fmt.Sprint(CutAllSpace([]interface{}{tmp, data[1]})[0])
			
 
				+	rets := regNumFloat.FindAllString(tmp, -1)
			
 
				+	fnums := []float64{}
			
 
				+	unitstrs := []string{}
			
 
				+	if len(rets) > 0 {
			
 
				+		pindex := 0 //单位前置
			
 
				+		for k, v := range rets {
			
 
				+			f, err := strconv.ParseFloat(v, 64)
			
 
				+			if err == nil {
			
 
				+				fnums = append(fnums, f)
			
 
				+				index := strings.Index(tmp, v)
			
 
				+				//单位后置
			
 
				+				start := index + len(v)
			
 
				+				end := start + 3
			
 
				+				//log.Println("vvv", tmp, v, pindex, index, start)
			
 
				+				if k > 0 {
			
 
				+					if start >= pindex+3 {
			
 
				+						pstart := pindex + 3
			
 
				+						if pstart >= index {
			
 
				+							pstart = index
			
 
				+						}
			
 
				+						if len(tmp) > end {
			
 
				+							unitstrs = append(unitstrs, tmp[pstart:index]+tmp[start:end])
			
 
				+						} else {
			
 
				+							unitstrs = append(unitstrs, tmp[pstart:index]+tmp[start:])
			
 
				+						}
			
 
				+					} else {
			
 
				+						if len(tmp) > end {
			
 
				+							unitstrs = append(unitstrs, tmp[start:end])
			
 
				+						} else {
			
 
				+							unitstrs = append(unitstrs, tmp[start:])
			
 
				+						}
			
 
				+					}
			
 
				+				} else {
			
 
				+					if len(tmp) > end {
			
 
				+						if index-3 >= 0 {
			
 
				+							unitstrs = append(unitstrs, tmp[index-3:index]+tmp[start:end])
			
 
				+						} else {
			
 
				+							unitstrs = append(unitstrs, tmp[start:end])
			
 
				+						}
			
 
				+					} else {
			
 
				+						if index-3 >= 0 {
			
 
				+							unitstrs = append(unitstrs, tmp[index-3:index]+tmp[start:])
			
 
				+						} else {
			
 
				+							unitstrs = append(unitstrs, tmp[start:])
			
 
				+						}
			
 
				+					}
			
 
				+				}
			
 
				+				pindex = start
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+	//log.Println("unitstrs", fnums, unitstrs)
			
 
				+	unit := float64(0)
			
 
				+	fnum := float64(0)
			
 
				+	for k, v := range fnums {
			
 
				+		fnum = v
			
 
				+		units := regStrUnit.FindAllString(unitstrs[k], -1)
			
 
				+		for _, v := range units {
			
 
				+			if moneyUnit[v] != 0 {
			
 
				+				unit = moneyUnit[v]
			
 
				+				break
			
 
				+			}
			
 
				+		}
			
 
				+		if unit != float64(0) { //取第一个
			
 
				+			break
			
 
				+		}
			
 
				+	}
			
 
				+	fnum = fnum * repUnit
			
 
				+	if unit == float64(0) {
			
 
				+		data[0] = fnum
			
 
				+	} else {
			
 
				+		data[0] = fnum * unit
			
 
				+	}
			
 
				+	if unit == 10000 {
			
 
				+		return data, false
			
 
				+	} else {
			
 
				+		return data, true
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//大写数子金额转换
			
 
				+func capitalMoney(data []interface{}) []interface{} {
			
 
				+	nodes := []float64{}
			
 
				+	node := float64(0)
			
 
				+	tmp := float64(0)
			
 
				+	decimals := 0.0
			
 
				+	ishaspoint := false //是否含小数点
			
 
				+	fnum := float64(0)
			
 
				+	end := false
			
 
				+	str := fmt.Sprint(data[0])
			
 
				+	//提取第一个大写信息
			
 
				+	strmatch := numCapitals.FindAllStringSubmatch(str, -1)
			
 
				+	if len(strmatch) > 0 {
			
 
				+		str = strmatch[0][0]
			
 
				+	}
			
 
				+	//修正单位类似：捌万伍仟肆佰捌拾贰万元整
			
 
				+	if strings.Contains(str, "万元") {
			
 
				+		str = strings.Replace(str, "万元", "#B#", -1)
			
 
				+		str = strings.Replace(str, "万", "亿", -1)
			
 
				+		str = strings.Replace(str, "#B#", "万元", -1)
			
 
				+	}
			
 
				+	moneyRegChar.ReplaceAllStringFunc(str, func(key string) string {
			
 
				+		if key == "元" || key == "圆" || key == "点" {
			
 
				+			ishaspoint = true
			
 
				+		}
			
 
				+		if v, ok := moneyChar[key].(float64); ok && !end {
			
 
				+			if ishaspoint && v > 10 { //排除后面有其他的单位
			
 
				+				return ""
			
 
				+			}
			
 
				+			//fmt.Println(key, v, fnum)
			
 
				+			if v < 10 && v >= 0 {
			
 
				+				if ishaspoint { //小数部分
			
 
				+					if v >= 1 {
			
 
				+						fnum = v
			
 
				+					} else if v < 1 && v > 0 {
			
 
				+						decimals += fnum * v
			
 
				+					}
			
 
				+				} else {
			
 
				+					if tmp != float64(0) {
			
 
				+						node += tmp
			
 
				+					}
			
 
				+					tmp = float64(v)
			
 
				+				}
			
 
				+			} else if v == 10000 || v == 100000000 { //单位万、亿
			
 
				+				if tmp != float64(0) {
			
 
				+					node += tmp
			
 
				+					tmp = float64(0)
			
 
				+				}
			
 
				+				nodes = append(nodes, node*float64(v))
			
 
				+				node = float64(0)
			
 
				+			} else {
			
 
				+				if v == 10 && tmp == 0 {
			
 
				+					tmp = 1
			
 
				+				}
			
 
				+				tmp = tmp * float64(v)
			
 
				+				node += tmp
			
 
				+				tmp = float64(0)
			
 
				+			}
			
 
				+		}
			
 
				+		if key == "整" || key == "正" || key == "分" {
			
 
				+			end = true
			
 
				+		}
			
 
				+		return ""
			
 
				+	})
			
 
				+	nodes = append(nodes, node, tmp)
			
 
				+	ret := float64(0)
			
 
				+	for _, v := range nodes {
			
 
				+		ret += v
			
 
				+	}
			
 
				+	return []interface{}{ret + decimals, data[1]}
			
 
				+}
			
 
				+
			
 
				+//过滤符号
			
 
				+func replaceSymbol(con string, rep []string) string {
			
 
				+	for _, v := range rep {
			
 
				+		con = strings.Replace(con, v, "", -1)
			
 
				+	}
			
 
				+	return con
			
 
				+}
			
 
				+
			
 
				+//符号替换
			
 
				+func replaceString(con string, ret, rep []string) string {
			
 
				+	for k, v := range ret {
			
 
				+		if len(rep) > k {
			
 
				+			con = strings.Replace(con, v, rep[k], -1)
			
 
				+		}
			
 
				+	}
			
 
				+	return con
			
 
				+}
			
 
				+
			
 
				+//费率转小数
			
 
				+func RateToFloat(con []interface{}) []interface{} {
			
 
				+	tmp := fmt.Sprint(CutAllSpace(con)[0])
			
 
				+	if strings.Contains(tmp, "%") || strings.Contains(tmp, "％") {
			
 
				+		tmp = strings.Replace(tmp, "%", "", -1)
			
 
				+		tmp = strings.Replace(tmp, "％", "", -1)
			
 
				+		rep := ObjToFloat([]interface{}{tmp, con[1]})[0]
			
 
				+		con[0] = rep.(float64) / 100
			
 
				+		return con
			
 
				+	} else {
			
 
				+		return ObjToFloat([]interface{}{tmp, con[1]})
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//大于一万亿的过滤掉
			
 
				+func ClearMaxAmount(data []interface{}) []interface{} {
			
 
				+	value, _ := data[0].(float64)
			
 
				+	if value >= 1000000000000 {
			
 
				+		data[0] = float64(0)
			
 
				+	}
			
 
				+	return data
			
 
				+}
			
--- a/src/jy/clear/totimestamp.go
+++ b/src/jy/clear/totimestamp.go
@@ -0,0 +1,131 @@
 
				+// totimestamp
			
 
				+package clear
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"regexp"
			
 
				+	"strings"
			
 
				+	"time"
			
 
				+)
			
 
				+
			
 
				+var reg, regA, regB, regC, regAfter *regexp.Regexp
			
 
				+
			
 
				+const (
			
 
				+	T = 365 * 86400
			
 
				+)
			
 
				+
			
 
				+var item = map[string]string{
			
 
				+	"一": "1", "二": "2", "三": "3", "四": "4", "五": "5",
			
 
				+	"六": "6", "七": "7", "九": "9", "十": "10", "零": "0", "〇": "0",
			
 
				+	"１": "1", "２": "2", "３": "3", "４": "4", "５": "5",
			
 
				+	"６": "6", "７": "7", "８": "8", "９": "9", "０": "0",
			
 
				+}
			
 
				+
			
 
				+func init() {
			
 
				+	reg, _ = regexp.Compile(`\d+`)
			
 
				+	regA, _ = regexp.Compile(`[一|二|三|四|五|六|七|八|九|十|零|〇|１|２|３|４|５|６|７|８|９|０]`)
			
 
				+	regB, _ = regexp.Compile(`\d+年\d+月\d+日((上|下)午)?\s*\d+[:：时]\d+分?[-—]\d+[:：时]\d+时?分?`)
			
 
				+	regC, _ = regexp.Compile(`\s*\d+[:：时]\d+分?[-—]`)
			
 
				+	regAfter, _ = regexp.Compile(`(下午D?\d{1,2}[时|:|：|h|H])`)
			
 
				+}
			
 
				+
			
 
				+/*字符时间转时间戳
			
 
				+支持全角
			
 
				+20060102->时间戳
			
 
				+20060102150405->时间戳
			
 
				+01%02->时间戳
			
 
				+2006%01%02->时间戳
			
 
				+2006%01%02%15->时间戳
			
 
				+2006%01%02%15%04->时间戳
			
 
				+2006%01%02%15%04%05->时间戳
			
 
				+*/
			
 
				+func ObjToTimestamp(data []interface{}) []interface{} {
			
 
				+	tmp := fmt.Sprint(data[0])
			
 
				+	//2016年12月7日上午9:00-11：30时 时间范围处理 取后面的时间
			
 
				+	if regB.MatchString(tmp) {
			
 
				+		tmp = regC.ReplaceAllString(tmp, "")
			
 
				+	}
			
 
				+	//2017年11月13日下午3时30分
			
 
				+	addreptime := int64(0)
			
 
				+	if regAfter.MatchString(tmp) {
			
 
				+		addreptime = 12 * 60 * 60
			
 
				+	}
			
 
				+	regRepl, _ := regexp.Compile(`[,，]`)
			
 
				+	tmp = regRepl.ReplaceAllString(tmp, "")
			
 
				+	for _, v := range spaces {
			
 
				+		strings.Replace(tmp, v, " ", -1)
			
 
				+	}
			
 
				+	tmps := reg.FindAllString(chineseToNumber(tmp), -1)
			
 
				+	//处理类似2016-12-0909:30:00时间
			
 
				+	if len(tmps) > 2 && len(tmps[2]) > 2 {
			
 
				+		newtmp := []string{}
			
 
				+		for k, v := range tmps {
			
 
				+			if k == 2 {
			
 
				+				newtmp = append(newtmp, v[0:2], v[2:])
			
 
				+			} else {
			
 
				+				newtmp = append(newtmp, v)
			
 
				+			}
			
 
				+		}
			
 
				+		tmps = newtmp
			
 
				+	}
			
 
				+	timestr := "" //2006-01-02 15:04:05
			
 
				+	timestamp := int64(0)
			
 
				+	if len(tmps) == 1 {
			
 
				+		if len(tmps[0]) == 8 {
			
 
				+			timestr = tmps[0][0:4] + "-" + tmps[0][4:6] + "-" + tmps[0][6:8]
			
 
				+			t, _ := time.ParseInLocation("2006-01-02-15-04", timestr+"-09-00", time.Local)
			
 
				+			timestamp = t.Unix()
			
 
				+		} else if len(tmps[0]) == 14 {
			
 
				+			timestr = tmps[0][0:4] + "-" + tmps[0][4:6] + "-" + tmps[0][6:8] + " " + tmps[0][8:10] + ":" + tmps[0][10:12] + ":" + tmps[0][12:14]
			
 
				+			t, _ := time.ParseInLocation("2006-01-02 15:04:00", timestr, time.Local)
			
 
				+			timestamp = t.Unix()
			
 
				+		}
			
 
				+	} else if len(tmps) == 2 {
			
 
				+		timestr = fmt.Sprint(time.Now().Year()) + "-" + MDhmsRepair(tmps[0]) + "-" + MDhmsRepair(tmps[1])
			
 
				+		t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
			
 
				+		timestamp = t.Unix()
			
 
				+	} else if len(tmps) == 3 {
			
 
				+		timestr = tmps[0] + "-" + MDhmsRepair(tmps[1]) + "-" + MDhmsRepair(tmps[2])
			
 
				+		t, _ := time.ParseInLocation("2006-01-02", timestr, time.Local)
			
 
				+		timestamp = t.Unix()
			
 
				+	} else if len(tmps) == 4 {
			
 
				+		timestr = tmps[0] + "-" + MDhmsRepair(tmps[1]) + "-" + MDhmsRepair(tmps[2]) + " " + MDhmsRepair(tmps[3])
			
 
				+		t, _ := time.ParseInLocation("2006-01-02 15", timestr, time.Local)
			
 
				+		timestamp = t.Unix()
			
 
				+	} else if len(tmps) >= 5 {
			
 
				+		timestr = tmps[0] + "-" + MDhmsRepair(tmps[1]) + "-" + MDhmsRepair(tmps[2]) + " " + MDhmsRepair(tmps[3]) + ":" + MDhmsRepair(tmps[4])
			
 
				+		t, _ := time.ParseInLocation("2006-01-02 15:04", timestr, time.Local)
			
 
				+		timestamp = t.Unix()
			
 
				+	}
			
 
				+	if timestamp < 0 || timestamp > (time.Now().Unix()+T) {
			
 
				+		data[0] = 0
			
 
				+	} else {
			
 
				+		if addreptime > 0 {
			
 
				+			timestamp += addreptime
			
 
				+		}
			
 
				+		data[0] = timestamp
			
 
				+	}
			
 
				+	return data
			
 
				+}
			
 
				+
			
 
				+//补位
			
 
				+func MDhmsRepair(t string) string {
			
 
				+	if len(t) == 1 {
			
 
				+		return "0" + t
			
 
				+	} else {
			
 
				+		return t
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//汉子数和全角转数字
			
 
				+func chineseToNumber(con string) string {
			
 
				+	tmp := regA.ReplaceAllStringFunc(con, func(key string) string {
			
 
				+		if item[key] != "" {
			
 
				+			return item[key]
			
 
				+		} else {
			
 
				+			return key
			
 
				+		}
			
 
				+		return key
			
 
				+	})
			
 
				+	return tmp
			
 
				+}
			
--- a/src/jy/extract/extract.go
+++ b/src/jy/extract/extract.go
@@ -2,6 +2,7 @@ package extract
 
				 
			
 
				 import (
			
 
				 	"encoding/json"
			
 
				+	"jy/clear"
			
 
				 	db "jy/mongodbutil"
			
 
				 	"jy/pretreated"
			
 
				 	ju "jy/util"
			
@@ -37,6 +38,7 @@ func StartExtractTaskId(taskId string) bool {
 
				 		ext.InitRuleBacks()
			
 
				 		ext.InitRuleCore()
			
 
				 		ext.InitTag()
			
 
				+		ext.InitClearFn()
			
 
				 		//只启动一次taskId
			
 
				 		go RunExtractTask(ext)
			
 
				 	}
			
@@ -76,8 +78,8 @@ func RunExtractTask(ext *ExtractTask) {
 
				 //信息预处理
			
 
				 func PreInfo(doc map[string]interface{}) *ju.Job {
			
 
				 	detail := ""
			
 
				-	d1 := doc["detail"].(string)
			
 
				-	d2 := doc["contenthtml"].(string)
			
 
				+	d1, _ := doc["detail"].(string)
			
 
				+	d2, _ := doc["contenthtml"].(string)
			
 
				 	if len(d1) >= len(d2) || d2 == "" {
			
 
				 		detail = d1
			
 
				 	} else {
			
@@ -126,7 +128,7 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 
				 	qu.Catch()
			
 
				 	qu.Try(func() {
			
 
				 		doc := *j.Data
			
 
				-		//前置规则,结果覆盖doc属性
			
 
				+		//全局前置规则,结果覆盖doc属性
			
 
				 		for _, v := range e.RulePres {
			
 
				 			doc = ExtRegPre(doc, j, v, e.TaskInfo)
			
 
				 		}
			
@@ -147,7 +149,7 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 
				 
			
 
				 			//抽取-规则
			
 
				 			for _, v := range vc.RuleCores {
			
 
				-				ExtRegCore(tmp, j, v, e)
			
 
				+				ExtRegCore(vc.ExtFrom, tmp, j, v, e)
			
 
				 			}
			
 
				 			//log.Println("抽取-规则", tmp)
			
 
				 
			
@@ -161,10 +163,18 @@ func (e *ExtractTask) ExtractProcess(j *ju.Job) {
 
				 			for _, v := range e.RuleBacks {
			
 
				 				ExtRegBack(j, v, e.TaskInfo)
			
 
				 			}
			
 
				-			bs, _ := json.Marshal(j.Result)
			
 
				-			log.Println("抽取结果", string(bs))
			
 
				 		}
			
 
				-		//抽取结果保存 todo
			
 
				+		//函数清理
			
 
				+		for key, val := range j.Result {
			
 
				+			for _, v := range val {
			
 
				+				data := clear.DoClearFn(e.ClearFn[key], []interface{}{v.Value, j.Content})
			
 
				+				v.Value = data[0]
			
 
				+			}
			
 
				+		}
			
 
				+		bs, _ := json.Marshal(j.Result)
			
 
				+		log.Println("抽取结果", j.SourceMid, string(bs))
			
 
				+		//分析抽取结果并保存 todo
			
 
				+		AnalysisSaveResult(j.Data, j.Result, e.TaskInfo.SaveColl)
			
 
				 
			
 
				 	}, func(err interface{}) {
			
 
				 		log.Println(err)
			
@@ -198,11 +208,13 @@ func ExtRegPre(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, t *TaskInf
 
				 }
			
 
				 
			
 
				 //抽取-规则
			
 
				-func ExtRegCore(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
			
 
				+func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *ExtractTask) {
			
 
				 	if in.IsLua {
			
 
				 		lua := ju.LuaScript{Code: in.Code, Name: in.Name, Doc: doc, Script: in.RuleText}
			
 
				 		if in.IsHasFields { //lua脚本配置有属性字段
			
 
				 			lua.KvMap = getKvByLuaFields(j, in, et.Tag)
			
 
				+		} else {
			
 
				+			lua.KvMap = map[string][]map[string]interface{}{}
			
 
				 		}
			
 
				 		lua.Block = j.Block
			
 
				 		extinfo := lua.RunScript("core")
			
@@ -212,17 +224,22 @@ func ExtRegCore(doc map[string]interface{}, j *ju.Job, in *RegLuaInfo, et *Extra
 
				 			}
			
 
				 			if tmps, ok := v.([]map[string]interface{}); ok {
			
 
				 				for _, tmp := range tmps {
			
 
				-					j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), tmp["value"]})
			
 
				+					j.Result[k] = append(j.Result[k],
			
 
				+						&ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), extfrom, tmp["value"]})
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				-		AddExtLog(j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
			
 
				+		if len(extinfo) > 0 {
			
 
				+			AddExtLog(j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
			
 
				+		}
			
 
				 	} else {
			
 
				+		//全文正则
			
 
				+		text := qu.ObjToString(doc[extfrom])
			
 
				 		if in.Field != "" {
			
 
				-			//全文正则
			
 
				-			text := qu.ObjToString(doc["detail"])
			
 
				-			extinfo := extRegCoreToResult(text, j, in)
			
 
				-			AddExtLog(j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
			
 
				+			extinfo := extRegCoreToResult(extfrom, text, j, in)
			
 
				+			if len(extinfo) > 0 {
			
 
				+				AddExtLog(j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -240,31 +257,65 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
 
				 				kvs2 := bl.ColonKV.Kvs_2
			
 
				 				for _, tag := range tags {
			
 
				 					for _, kv := range kvs {
			
 
				-						if kv.Key == tag.Key {
			
 
				-							text := ju.TrimLRSpace(kv.Value, "")
			
 
				-							if text != "" {
			
 
				-								kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				-									"value": text,
			
 
				-									"type":  "colon1",
			
 
				-									"field": field,
			
 
				-									"key":   tag.Key,
			
 
				-								})
			
 
				+						if tag.Type == "string" {
			
 
				+							if kv.Key == tag.Key {
			
 
				+								text := ju.TrimLRSpace(kv.Value, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "colon1",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "string",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				+							}
			
 
				+						} else if tag.Type == "regexp" {
			
 
				+							if tag.Reg.MatchString(kv.Key) {
			
 
				+								text := ju.TrimLRSpace(kv.Value, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "colon1",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "regexp",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				 							}
			
 
				-							break
			
 
				 						}
			
 
				 					}
			
 
				 					for _, kv := range kvs2 {
			
 
				-						if kv.Key == tag.Key {
			
 
				-							text := ju.TrimLRSpace(kv.Value, "")
			
 
				-							if text != "" {
			
 
				-								kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				-									"value": text,
			
 
				-									"type":  "colon2",
			
 
				-									"field": field,
			
 
				-									"key":   tag.Key,
			
 
				-								})
			
 
				+						if tag.Type == "string" {
			
 
				+							if kv.Key == tag.Key {
			
 
				+								text := ju.TrimLRSpace(kv.Value, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "colon2",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "string",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				+							}
			
 
				+						} else if tag.Type == "regexp" {
			
 
				+							if tag.Reg.MatchString(kv.Key) {
			
 
				+								text := ju.TrimLRSpace(kv.Value, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "colon2",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "regexp",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				 							}
			
 
				-							break
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
@@ -274,17 +325,34 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
 
				 				kvs := bl.SpaceKV.Kvs
			
 
				 				for _, tag := range tags {
			
 
				 					for _, kv := range kvs {
			
 
				-						if kv.Key == tag.Key {
			
 
				-							text := ju.TrimLRSpace(kv.Value, "")
			
 
				-							if text != "" {
			
 
				-								kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				-									"value": text,
			
 
				-									"type":  "space",
			
 
				-									"field": field,
			
 
				-									"key":   tag.Key,
			
 
				-								})
			
 
				+						if tag.Type == "string" {
			
 
				+							if kv.Key == tag.Key {
			
 
				+								text := ju.TrimLRSpace(kv.Value, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "space",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "string",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				+							}
			
 
				+						} else if tag.Type == "regexp" {
			
 
				+							if tag.Reg.MatchString(kv.Key) {
			
 
				+								text := ju.TrimLRSpace(kv.Value, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "space",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "regexp",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				 							}
			
 
				-							break
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
@@ -294,17 +362,34 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
 
				 				kv := bl.TableKV.Kv
			
 
				 				for _, tag := range tags {
			
 
				 					for k, val := range kv {
			
 
				-						if k == tag.Key {
			
 
				-							text := ju.TrimLRSpace(val, "")
			
 
				-							if text != "" {
			
 
				-								kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				-									"value": text,
			
 
				-									"type":  "table",
			
 
				-									"field": field,
			
 
				-									"key":   tag.Key,
			
 
				-								})
			
 
				+						if tag.Type == "string" {
			
 
				+							if k == tag.Key {
			
 
				+								text := ju.TrimLRSpace(val, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "table",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "string",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				+							}
			
 
				+						} else if tag.Type == "regexp" {
			
 
				+							if tag.Reg.MatchString(k) {
			
 
				+								text := ju.TrimLRSpace(val, "")
			
 
				+								if text != "" {
			
 
				+									kvmap[field] = append(kvmap[field], map[string]interface{}{
			
 
				+										"value":     text,
			
 
				+										"type":      "table",
			
 
				+										"field":     field,
			
 
				+										"key":       tag.Key,
			
 
				+										"matchtype": "regexp",
			
 
				+									})
			
 
				+								}
			
 
				+								break
			
 
				 							}
			
 
				-							break
			
 
				 						}
			
 
				 					}
			
 
				 				}
			
@@ -315,7 +400,7 @@ func getKvByLuaFields(j *ju.Job, in *RegLuaInfo, t map[string][]*Tag) map[string
 
				 }
			
 
				 
			
 
				 //正则提取结果
			
 
				-func extRegCoreToResult(text string, j *ju.Job, v *RegLuaInfo) map[string]interface{} {
			
 
				+func extRegCoreToResult(extfrom, text string, j *ju.Job, v *RegLuaInfo) map[string]interface{} {
			
 
				 	extinfo := map[string]interface{}{}
			
 
				 	if v.RegCore.Bextract { //正则是两部分的，可以直接抽取的（含下划线）
			
 
				 		apos := v.RegCore.Reg.FindAllStringSubmatchIndex(text, -1)
			
@@ -323,13 +408,16 @@ func extRegCoreToResult(text string, j *ju.Job, v *RegLuaInfo) map[string]interf
 
				 			pos := apos[0]
			
 
				 			for k, p := range v.RegCore.ExtractPos {
			
 
				 				if len(pos) > p {
			
 
				+					if pos[p] == -1 || pos[p+1] == -1 {
			
 
				+						continue
			
 
				+					}
			
 
				 					val := text[pos[p]:pos[p+1]]
			
 
				 					extinfo[k] = val
			
 
				 					if val != "" {
			
 
				 						if j.Result[v.Field] == nil {
			
 
				 							j.Result[k] = [](*ju.ExtField){}
			
 
				 						}
			
 
				-						j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.RuleText, "regexp", val})
			
 
				+						j.Result[k] = append(j.Result[k], &ju.ExtField{k, v.Code, "regexp", "regcontent", extfrom, val})
			
 
				 					}
			
 
				 				}
			
 
				 			}
			
@@ -341,7 +429,7 @@ func extRegCoreToResult(text string, j *ju.Job, v *RegLuaInfo) map[string]interf
 
				 			if j.Result[v.Field] == nil {
			
 
				 				j.Result[v.Field] = [](*ju.ExtField){}
			
 
				 			}
			
 
				-			j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.RuleText, "regexp", val})
			
 
				+			j.Result[v.Field] = append(j.Result[v.Field], &ju.ExtField{v.Field, v.Code, "regexp", "regcontent", extfrom, val})
			
 
				 		}
			
 
				 	}
			
 
				 	return extinfo
			
@@ -360,11 +448,13 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 
				 			if tmps, ok := v.([]map[string]interface{}); ok {
			
 
				 				j.Result[k] = [](*ju.ExtField){}
			
 
				 				for _, tmp := range tmps {
			
 
				-					j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), tmp["value"]})
			
 
				+					j.Result[k] = append(j.Result[k], &ju.ExtField{k, qu.ObjToString(tmp["key"]), qu.ObjToString(tmp["type"]), qu.ObjToString(tmp["matchtype"]), qu.ObjToString(tmp["extfrom"]), tmp["value"]})
			
 
				 				}
			
 
				 			}
			
 
				 		}
			
 
				-		AddExtLog(j.SourceMid, result, extinfo, in, t) //抽取日志
			
 
				+		if len(extinfo) > 0 {
			
 
				+			AddExtLog(j.SourceMid, result, extinfo, in, t) //抽取日志
			
 
				+		}
			
 
				 	} else {
			
 
				 		extinfo := map[string]interface{}{}
			
 
				 		if in.Field != "" && j.Result[in.Field] != nil {
			
@@ -379,7 +469,9 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 
				 				exts = append(exts, text)
			
 
				 			}
			
 
				 			extinfo[in.Field] = exts
			
 
				-			AddExtLog(j.SourceMid, tmp, extinfo, in, t) //抽取日志
			
 
				+			if len(extinfo) > 0 {
			
 
				+				AddExtLog(j.SourceMid, tmp, extinfo, in, t) //抽取日志
			
 
				+			}
			
 
				 		} else {
			
 
				 			for key, tmp := range j.Result {
			
 
				 				exts := []interface{}{}
			
@@ -393,7 +485,9 @@ func ExtRegBack(j *ju.Job, in *RegLuaInfo, t *TaskInfo) {
 
				 				}
			
 
				 				extinfo[key] = exts
			
 
				 			}
			
 
				-			AddExtLog(j.SourceMid, j.Result, extinfo, in, t) //抽取日志
			
 
				+			if len(extinfo) > 0 {
			
 
				+				AddExtLog(j.SourceMid, j.Result, extinfo, in, t) //抽取日志
			
 
				+			}
			
 
				 		}
			
 
				 	}
			
 
				 }
			
@@ -407,10 +501,12 @@ func getResultMapForLua(j *ju.Job) map[string][]map[string]interface{} {
 
				 		}
			
 
				 		for _, v := range val {
			
 
				 			tmp := map[string]interface{}{
			
 
				-				"field": v.Field,
			
 
				-				"value": v.Value,
			
 
				-				"type":  v.Type,
			
 
				-				"key":   v.Key,
			
 
				+				"field":     v.Field,
			
 
				+				"value":     v.Value,
			
 
				+				"type":      v.Type,
			
 
				+				"matchtype": v.MatchType,
			
 
				+				"key":       v.Key,
			
 
				+				"extfrom":   v.ExtFrom,
			
 
				 			}
			
 
				 			result[key] = append(result[key], tmp)
			
 
				 		}
			
@@ -466,3 +562,14 @@ func SaveExtLog() {
 
				 	}
			
 
				 	time.AfterFunc(10*time.Second, SaveExtLog)
			
 
				 }
			
 
				+
			
 
				+//分析抽取结果并保存
			
 
				+func AnalysisSaveResult(doc *map[string]interface{}, result map[string][]*ju.ExtField, totable string) {
			
 
				+	//待完善
			
 
				+	for key, val := range result {
			
 
				+		for _, v := range val { //暂时取第一个保存
			
 
				+			(*doc)[key] = v.Value
			
 
				+		}
			
 
				+	}
			
 
				+	db.Mgo.Update(totable, `{"_id":"`+qu.BsonIdToSId((*doc)["_id"])+`"}`, doc, true, false)
			
 
				+}
			
--- a/src/jy/extract/extractInit.go
+++ b/src/jy/extract/extractInit.go
@@ -24,7 +24,7 @@ type ExtReg struct {
 
				 }
			
 
				 type RuleCore struct {
			
 
				 	LuaLogic  string        //进入逻辑
			
 
				-	IsBlock   bool          //是否分块
			
 
				+	ExtFrom   string        //从哪个字段抽取
			
 
				 	RulePres  []*RegLuaInfo //前置规则
			
 
				 	RuleBacks []*RegLuaInfo //后置规则
			
 
				 	RuleCores []*RegLuaInfo //抽取规则
			
@@ -38,18 +38,20 @@ type TaskInfo struct {
 
				 	ProcessPool                  chan bool //任务进程池
			
 
				 }
			
 
				 type Tag struct {
			
 
				-	Type string //标签类型 string 字符串、regexp 正则
			
 
				-	Key  string //
			
 
				+	Type string         //标签类型 string 字符串、regexp 正则
			
 
				+	Key  string         //
			
 
				+	Reg  *regexp.Regexp //
			
 
				 }
			
 
				 type ExtractTask struct {
			
 
				-	Id        string            //任务id
			
 
				-	IsRun     bool              //是否启动
			
 
				-	Content   string            //信息内容
			
 
				-	TaskInfo  *TaskInfo         //任务信息
			
 
				-	RulePres  []*RegLuaInfo     //前置规则
			
 
				-	RuleBacks []*RegLuaInfo     //后置规则
			
 
				-	RuleCores []*RuleCore       //抽取规则
			
 
				-	Tag       map[string][]*Tag //标签库
			
 
				+	Id        string              //任务id
			
 
				+	IsRun     bool                //是否启动
			
 
				+	Content   string              //信息内容
			
 
				+	TaskInfo  *TaskInfo           //任务信息
			
 
				+	RulePres  []*RegLuaInfo       //前置规则
			
 
				+	RuleBacks []*RegLuaInfo       //后置规则
			
 
				+	RuleCores []*RuleCore         //抽取规则
			
 
				+	Tag       map[string][]*Tag   //标签库
			
 
				+	ClearFn   map[string][]string //清理函数
			
 
				 }
			
 
				 
			
 
				 func init() {
			
@@ -138,7 +140,7 @@ func (e *ExtractTask) InitRuleCore() {
 
				 		}
			
 
				 		rcore := &RuleCore{}
			
 
				 		rcore.LuaLogic = qu.ObjToString(vv["s_luascript"]) //是否进入逻辑脚本
			
 
				-		rcore.IsBlock, _ = vv["isblock"].(bool)            //是否分块
			
 
				+		rcore.ExtFrom = qu.If(vv["extfrom"].(bool), "title", "detail").(string)
			
 
				 		//前置规则
			
 
				 		rulePres := []*RegLuaInfo{}
			
 
				 		plist, _ := db.Mgo.Find("rule_logicpre", `{"sid":"`+qu.BsonIdToSId(vv["_id"])+`","s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
			
@@ -203,10 +205,13 @@ func (e *ExtractTask) InitRuleCore() {
 
				 			}
			
 
				 			if rinfo.IsLua {
			
 
				 				rinfo.RuleText = v["s_luascript"].(string)
			
 
				-				rinfo.LFields, _ = v["s_fields"].([]interface{})
			
 
				+				//暂时提取全部属性
			
 
				+				rinfo.LFields = getALLFields()
			
 
				+				rinfo.IsHasFields = true
			
 
				+				/*rinfo.LFields, _ = v["s_fields"].([]interface{})
			
 
				 				if len(rinfo.LFields) > 0 {
			
 
				 					rinfo.IsHasFields = true
			
 
				-				}
			
 
				+				}*/
			
 
				 			} else {
			
 
				 				rinfo.RuleText = v["s_rule"].(string)
			
 
				 				rinfo.Field = v["s_field"].(string)
			
@@ -252,5 +257,31 @@ func (e *ExtractTask) InitTag() {
 
				 		}
			
 
				 	}
			
 
				 	//正则标签库
			
 
				+	list, _ = db.Mgo.Find("tagdetailinfo", `{"s_type":"正则","s_version":"`+e.TaskInfo.Version+`"}`, nil, nil, false, -1, -1)
			
 
				+	for _, v := range *list {
			
 
				+		field := qu.ObjToString(v["s_field"])
			
 
				+		if tmp, ok := v["content"].([]interface{}); ok {
			
 
				+			for _, key := range tmp {
			
 
				+				tag := &Tag{Type: "regexp", Key: key.(string), Reg: regexp.MustCompile(key.(string))}
			
 
				+				e.Tag[field] = append(e.Tag[field], tag)
			
 
				+			}
			
 
				+		}
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+//获取fields
			
 
				+func getALLFields() []interface{} {
			
 
				+	fields := []interface{}{}
			
 
				+	list, _ := db.Mgo.Find("fields", `{}`, nil, `{"s_field":1}`, false, -1, -1)
			
 
				+	for _, v := range *list {
			
 
				+		fields = append(fields, v["s_field"])
			
 
				+	}
			
 
				+	return fields
			
 
				+}
			
 
				 
			
 
				+//加载clear函数
			
 
				+func (e *ExtractTask) InitClearFn() {
			
 
				+	fn := map[string][]string{}
			
 
				+	fn["budget"] = []string{"tomoney", "clearMaxAmount"}
			
 
				+	e.ClearFn = fn
			
 
				 }
			
--- a/src/jy/util/article.go
+++ b/src/jy/util/article.go
@@ -17,10 +17,12 @@ type Job struct {
 
				 }
			
 
				 
			
 
				 type ExtField struct {
			
 
				-	Field string      //属性
			
 
				-	Key   string      //匹配标签或正则
			
 
				-	Type  string      //抽取类型
			
 
				-	Value interface{} //抽取结果
			
 
				+	Field     string      //属性
			
 
				+	Key       string      //匹配标签、正则代码
			
 
				+	Type      string      //kv(细类:colon1,colon2,space,table)、正则(regexp)
			
 
				+	MatchType string      //匹配类型：1：标签库类型(string,regexp)，2：全文正则regcontent
			
 
				+	ExtFrom   string      //抽取来源(title,detail)
			
 
				+	Value     interface{} //抽取结果
			
 
				 }
			
 
				 
			
 
				 //块
			
--- a/src/web/templates/admin/rule_logiclist.html
+++ b/src/web/templates/admin/rule_logiclist.html
@@ -104,6 +104,7 @@ $(function () {
 
				 				tag=[{label:"名称",s_label:"s_name",placeholder:"",must:true},
			
 
				 					{label:"描述",s_label:"s_descript",type:"tpl_text"},
			
 
				 					{label:"启用",s_label:"isuse",type:"tpl_list_local",list:[{"s_name":"是","_id":true},{"s_name":"否","_id":false}],default:true},
			
 
				+					{label:"标题抽取",s_label:"extfrom",type:"tpl_list_local",list:[{"s_name":"是","_id":true},{"s_name":"否","_id":false}],default:false},
			
 
				 					{label:"是否适用",s_label:"s_luascript",type:"tpl_text",must:true},
			
 
				 					{s_label:"_id",type:"tpl_hidden"},
			
 
				 					{s_label:"s_version",type:"tpl_hidden"}]
		`@@ -0,0 +1 @@`
		`+Subproject commit 902a5ce7a7812e2ba9f73b9d96c09d5136df39cd`