package summarization;

import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:summarization/TextParser.class */
public class TextParser {
    private static final char[] NEWLINE_CHAR = {'\n'};
    private static final char[] CARRIAGE_RETURN_CHAR = {'\r'};
    private static final String NEWLINE = new String(NEWLINE_CHAR);
    private static final String CARRIAGE_RETURN = new String(CARRIAGE_RETURN_CHAR);

    public static String[] getParagraphs(String str) {
        return str.trim().split("[\\n\\r]{2,}");
    }

    public static Vector getSentences(String str, String str2) {
        int length;
        int length2;
        boolean z = false;
        Vector vector = new Vector();
        String trim = str.trim();
        Pattern.compile(new StringBuffer().append(".*").append("[\\.\\?\\!]+\"?(</F>)?").toString()).matcher(trim.substring(trim.length() - 2));
        String replaceAll = Pattern.compile("\\s+").matcher(trim).replaceAll(" ");
        int i = 0;
        while (true) {
            int i2 = i;
            if (replaceAll.length() <= 0) {
                return vector;
            }
            Matcher matcher = Pattern.compile("[\\.\\?\\!]+\"?(</F>)?").matcher(replaceAll);
            if (matcher.find(i2)) {
                length = matcher.start();
                length2 = matcher.end();
            } else {
                length = replaceAll.length();
                length2 = replaceAll.length();
            }
            String trim2 = replaceAll.substring(0, length).replaceAll("</?F>", "").trim();
            String substring = trim2.substring(trim2.lastIndexOf(" ") + 1);
            String substring2 = replaceAll.substring(length2);
            Matcher matcher2 = Pattern.compile("\\s?\\S*\\s\\S*\\s").matcher(substring2);
            String substring3 = matcher2.find() ? substring2.substring(matcher2.start(), matcher2.end()) : substring2;
            boolean startsWith = replaceAll.substring(length2).trim().startsWith("<S>");
            boolean startsWith2 = substring2.trim().startsWith("<NS>");
            Matcher matcher3 = Pattern.compile(str2).matcher(substring.toLowerCase());
            boolean z2 = matcher3.find() && matcher3.start() == 0 && matcher3.end() == substring.length();
            Matcher matcher4 = Pattern.compile("\\s+\"?((('[a-z]\\s)?[A-Z])|([0-9]))").matcher(substring3.replaceAll("<((/?F)|(N?S))>", ""));
            boolean z3 = !z2 && (matcher4.find() && matcher4.start() == 0);
            if (startsWith) {
                z3 = true;
            } else if (startsWith2) {
                z3 = false;
            }
            if (!(replaceAll.substring(length, length2).equals(".") && (z3 || substring3.replaceAll("<((/?F)|(N?S))>", "").trim().length() == 0)) && replaceAll.substring(length, length2).equals(".")) {
                i = length2;
            } else {
                String trim3 = replaceAll.substring(0, length2).replaceAll("<N?S>", "").trim();
                if (z) {
                    if (trim3.indexOf("</F>") == -1) {
                        trim3 = new StringBuffer().append("<F>").append(trim3.replaceAll("</?F>", "")).append("</F>").toString();
                    } else if (trim3.endsWith("</F>")) {
                        z = false;
                        trim3 = new StringBuffer().append("<F>").append(trim3.replaceAll("</?F>", "")).append("</F>").toString();
                    } else {
                        trim3 = trim3.replaceAll("</?F>", "");
                        z = false;
                    }
                } else if (z || !trim3.startsWith("<F>")) {
                    trim3 = trim3.replaceAll("</?F>", "");
                } else if (trim3.indexOf("</F>") == -1) {
                    z = true;
                    trim3 = new StringBuffer().append("<F>").append(trim3.replaceAll("</?F>", "")).append("</F>").toString();
                } else if (trim3.endsWith("</F>")) {
                    trim3 = new StringBuffer().append("<F>").append(trim3.replaceAll("</?F>", "")).append("</F>").toString();
                }
                vector.add(trim3);
                replaceAll = replaceAll.substring(length2);
                i = 0;
            }
        }
    }

    public static Vector[] parse(String str, String str2) throws IllegalInputTextException {
        Vector[] parse = parse(tagText(str, str2), 0, 0, str2, null);
        if (parse[0].size() == 0 || parse[1].size() == 0) {
            throw new IllegalInputTextException("The structure of input text is of illegal form. Please make sure the text contains\n\n1) a title as the first parapraph in the text\n2) a level 1 heading (optional)\n3) a piece of text (optionally tagged as such).\n\nExample:\n==================================================\nThis is the title\n\nThis is a heading of level 1\n\nText to be summarized. Textblocks should contain multiple sentences or should\notherwise be tagged as text. Each textblock corresponds to the heading above it\n(should not be a title)\n\nThis is another heading of level 1\n\n<T>This is a textblock consisting of a single sentence, and should therefor be\ntagged.</T>\n==================================================\n");
        }
        return parse;
    }

    private static Vector[] parse(String str, int i, int i2, String str2, Heading heading) {
        boolean z = false;
        Vector vector = new Vector();
        Vector vector2 = new Vector();
        int length = new StringBuffer().append("<H").append(i).append(">").toString().length();
        int indexOf = str.indexOf(new StringBuffer().append("<H").append(i).append(">").toString());
        int indexOf2 = str.indexOf(new StringBuffer().append("</H").append(i).append(">").toString());
        while (!z) {
            int i3 = indexOf;
            int i4 = indexOf2;
            int i5 = i4 + length + 1;
            indexOf = str.indexOf(new StringBuffer().append("<H").append(i).append(">").toString(), indexOf2 + length + 1);
            indexOf2 = str.indexOf(new StringBuffer().append("</H").append(i).append(">").toString(), indexOf2 + length + 1);
            z = indexOf == -1 || indexOf2 == -1;
            int length2 = z ? str.length() : indexOf;
            String substring = str.substring(i5, length2);
            int indexOf3 = substring.indexOf(new StringBuffer().append("<H").append(i + 1).append(">").toString());
            String str3 = "";
            if ((indexOf3 == -1 && substring.indexOf(new StringBuffer().append("</H").append(i + 1).append(">").toString()) == -1) ? false : true) {
                str3 = str.substring(i5 + indexOf3, length2);
                length2 = i5 + indexOf3;
            }
            String substring2 = str.substring(i3 + length, i4);
            String substring3 = str.substring(i5, length2);
            Heading heading2 = new Heading(substring2, i, heading);
            vector2.add(heading2);
            if (substring3.trim().length() > 0) {
                vector.add(new Block(substring3.replaceAll("</?T>", ""), i2 + i5, heading2, str2));
            }
            if (str3.trim().length() > 0) {
                Vector[] parse = parse(str3, i + 1, i2 + length2, str2, heading2);
                vector.addAll(parse[0]);
                vector2.addAll(parse[1]);
            }
        }
        return new Vector[]{vector, vector2};
    }

    public static String tagText(String str, String str2) {
        String stringBuffer;
        String[] split = str.replaceAll("<h", "<H").replaceAll("</h", "</H").replaceAll("<t>", "<T>").replaceAll("</t>", "</T>").replaceAll("<f>", "<F>").replaceAll("</f>", "</F>").replaceAll("<s>", "<S>").replaceAll("<ns>", "<NS>").replaceAll(CARRIAGE_RETURN, "").split(new StringBuffer().append(NEWLINE).append("{2,}").toString());
        String trim = split[0].trim();
        Matcher matcher = Pattern.compile("</H0>").matcher(trim);
        if (!trim.startsWith("<H0>")) {
            trim = new StringBuffer().append("<H0>").append(trim).toString();
        }
        if (!matcher.find()) {
            trim = new StringBuffer().append(trim).append("</H0>").toString();
        }
        String stringBuffer2 = new StringBuffer().append(trim).append(NEWLINE).append(NEWLINE).toString();
        for (int i = 1; i < split.length; i++) {
            String trim2 = split[i].trim();
            String str3 = "1";
            if (getSentences(trim2, str2).size() == 1) {
                Pattern compile = Pattern.compile("<H\\d+>");
                Pattern compile2 = Pattern.compile("</H\\d+>");
                Matcher matcher2 = compile.matcher(trim2);
                Matcher matcher3 = compile2.matcher(trim2);
                boolean find = matcher2.find();
                boolean find2 = matcher3.find();
                if (!trim2.startsWith("<T>") && !trim2.endsWith("</T>")) {
                    if (find) {
                        str3 = trim2.substring(matcher2.start() + 2, matcher2.end() - 1);
                    } else if (find2) {
                        str3 = trim2.substring(matcher3.start() + 3, matcher3.end() - 1);
                    }
                    if (!find) {
                        trim2 = new StringBuffer().append("<H").append(str3).append(">").append(trim2).toString();
                    }
                    if (!find2) {
                        trim2 = new StringBuffer().append(trim2).append("</H").append(str3).append(">").toString();
                    }
                } else if (!trim2.startsWith("<T>")) {
                    trim2 = new StringBuffer().append("<T>").append(trim2).toString();
                } else if (!trim2.startsWith("</T>")) {
                    trim2 = new StringBuffer().append(trim2).append("</T>").toString();
                }
                stringBuffer = new StringBuffer().append(stringBuffer2).append(trim2).append(NEWLINE).append(NEWLINE).toString();
            } else {
                Pattern compile3 = Pattern.compile("<H\\d+>");
                Pattern compile4 = Pattern.compile("</H\\d+>");
                Matcher matcher4 = compile3.matcher(trim2);
                Matcher matcher5 = compile4.matcher(trim2);
                if (!matcher4.find() || !matcher5.find()) {
                    if (!trim2.startsWith("<T>")) {
                        trim2 = new StringBuffer().append("<T>").append(trim2).toString();
                    }
                    if (!trim2.endsWith("</T>")) {
                        trim2 = new StringBuffer().append(trim2).append("</T>").toString();
                    }
                }
                stringBuffer = new StringBuffer().append(stringBuffer2).append(trim2).append(NEWLINE).append(NEWLINE).toString();
            }
            stringBuffer2 = stringBuffer;
        }
        return stringBuffer2;
    }
}
