package extract

import (
	"regexp"
	"strings"
)

// 中国电信集团有限公司驻马店分公司 2025-3-08 17：10：30 提交报价￥266000 竞价成交
var clean1 = regexp.MustCompile("(([\u4E00-\u9FA5]{5,30}公司)[\\s-0-9:：]+提交报价[￥]?([\\s0-9]+)竞价成交\n)")
var clean2 = regexp.MustCompile("([\\s ]+([0-9]+)[\\s ]+([0-9]+)[\\s ]+)")
var blTextReg *regexp.Regexp = regexp.MustCompile("(打分表|负责人|单位|个人|投标人|项目|企业)业绩|业绩奖项|主要人员相关资料|唱标记录|否决投标的?情况说明")
var unblTextReg *regexp.Regexp = regexp.MustCompile("(项目业绩案例|类似项目业绩)")
var beforeTextReg *regexp.Regexp = regexp.MustCompile("(招标代理机构|招标单位|招标人)[:：].{4,25}\n")

// 清洗文本
func CleanText(detail string) string {

	//业绩排除
	detail = ExcludeYeJi(detail)
	//特殊文本结构转换
	detail = clean1.ReplaceAllString(detail, "\n中标单位：${2}\n中标金额：${3}")
	//对数字空格进行转换
	detail = clean2.ReplaceAllString(detail, "${2}${3}")

	return detail
}

// 排除业绩
func ExcludeYeJi(detail string) string {
	if blTextReg.MatchString(detail) && !unblTextReg.MatchString(detail) {
		if strings.Index(detail, "业绩") > 1 {
			before_arr := []string{} //如果有采购单位信息-文本置前
			if beforeTextReg.MatchString(detail) {
				before_arr = beforeTextReg.FindAllString(detail, -1)
			}
			detail = detail[:strings.Index(detail, "业绩")]
			if len(before_arr) > 0 {
				detail = strings.Join(before_arr, "\n") + detail
			}
		}
	}
	return detail
}