소스 검색

下标越界
table抽取的结果丢失

fengweiqiang 6 년 전
부모
커밋
e7c18fd062
3개의 변경된 파일12개의 추가작업 그리고 12개의 파일을 삭제
  1. 3 3
      src/jy/extract/extract.go
  2. 6 6
      src/jy/pretreated/analytable.go
  3. 3 3
      src/jy/pretreated/tablev2.go

+ 3 - 3
src/jy/extract/extract.go

@@ -727,7 +727,7 @@ func ExtRegCore(extfrom string, doc map[string]interface{}, j *ju.Job, in *RegLu
 				}
 			} else {
 				for _, v := range j.Block {
-					extinfo := extRegCoreToResult(extfrom, v.Text, &v.Tag, j, in)
+					extinfo := extRegCoreToResult(extfrom, v.Text, &v.Classify, j, in)
 					if len(extinfo) > 0 {
 						AddExtLog("extract", j.SourceMid, nil, extinfo, in, et.TaskInfo) //抽取日志
 					}
@@ -947,7 +947,7 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
 						if v.RegCore.NumSign == -1 { //正负值修正
 							val = "-" + val
 						}
-						exfield := ju.ExtField{Field: k, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, Value: val}
+						exfield := ju.ExtField{BlockTag:*tag,Field: k, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, Value: val}
 						if extfrom == "title" {
 							exfield.Score = 4
 						}
@@ -999,7 +999,7 @@ func extRegCoreToResult(extfrom, text string, tag *map[string]bool, j *ju.Job, v
 			if j.Result[v.Field] == nil {
 				j.Result[v.Field] = [](*ju.ExtField){}
 			}
-			field := &ju.ExtField{Field: v.Field, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, Value: val}
+			field := &ju.ExtField{BlockTag:*tag,Field: v.Field, Code: v.Code, RuleText: v.RuleText, Type: "regexp", MatchType: "regcontent", ExtFrom: extfrom, Value: val}
 			if extfrom == "title" {
 				field.Score = 4
 			}

+ 6 - 6
src/jy/pretreated/analytable.go

@@ -750,7 +750,7 @@ func (table *Table) createTabe(trs *goquery.Selection) {
 		//遍历每行的td
 		tds := sel.ChildrenFiltered("td,th")
 		TR := NewTR(table)
-		tdTextIsNull := true
+		tdTextIsNull := false
 		tds.Each(func(m int, selm *goquery.Selection) {
 			//对隐藏列不处理!!!
 			if IsHide(selm) {
@@ -760,12 +760,12 @@ func (table *Table) createTabe(trs *goquery.Selection) {
 			td := NewTD(selm, TR, table) //初始化td,kv处理,td中有table处理,td的方向
 			//num++
 			TR.AddTD(td)
-			if td.Val != "" && td.SonTableResult != nil{ //删除一个tr,tr中所有td是空值的
-				tdTextIsNull = false
+			if td.Val == "" && td.SonTableResult == nil { //删除一个tr,tr中所有td是空值的
+				tdTextIsNull = true
 			}
 		})
 		//向table添加每行不为空的tr
-		if tdTextIsNull {
+		if !tdTextIsNull {
 			table.AddTR(TR)
 		}
 	})
@@ -1763,7 +1763,7 @@ func (table *Table) FindTdVal(td *TD, direct, vdirect int) (b bool) {
 						bfind = true
 					}
 				}
-				if bvalfind && varrpos >-1{
+				if bvalfind && varrpos > -1 && len(vals) > varrpos {
 					vals[varrpos] = td.Val // += "__" + td.Val
 				} else {
 					//添加时候去除空值和nil
@@ -3147,7 +3147,7 @@ func initLineMapLineMapArr(table *Table) (lineMapArr map[string]*SortMap, lineMa
 	for _, key := range table.SortKV.Keys { //遍历table.SortKV.Keys而不是直接遍历table.SortKV.Map是为了得到table头的顺序
 		val := table.SortKV.Map[key]
 		key = regReplAllSpace.ReplaceAllString(key, "")
-		key = strings.Replace(key, "", "", -1)    //处理一个特殊的采购量 经上层处理空格后未处理掉
+		key = strings.Replace(key, "", "", -1) //处理一个特殊的采购量 经上层处理空格后未处理掉
 		if realTypeVal, ok := val.([]string); ok { //val为数组 {"数量":["1","2","3"]}
 			/*
 				{

+ 3 - 3
src/jy/pretreated/tablev2.go

@@ -210,10 +210,10 @@ func (td *TD) tdHasTable(bsontable *bool, tr *TR, table *Table) {
 			sonts := AnalyTableV2(tabs, ts.Toptype, stag, td.Html, 2, ts.Id, table.TableResult.RuleBlock) //又一次调用解析表格入口
 			td.BH = false
 			for k,v := range sonts.SortKV.Map{
-				if td.SonTableResult == nil{
-					td.SonTableResult = NewTableResult(sonts.Id,sonts.Toptype,sonts.BlockTag,sonts.Html,sonts.Itype,sonts.RuleBlock)
+				if td.TR.Table.TableResult == nil{
+					td.TR.Table.TableResult = NewTableResult(sonts.Id,sonts.Toptype,sonts.BlockTag,sonts.Html,sonts.Itype,sonts.RuleBlock)
 				}
-				td.SonTableResult.SortKV.AddKey(k,v)
+				td.TR.Table.TableResult.SortKV.AddKey(k,v)
 			}
 			//td.SonTableResult = sonts
 			//for _, k := range sonts.SortKV.Keys {