From d6461db848bf44367c181dc030db5148b32d6dce Mon Sep 17 00:00:00 2001 From: TudbuT Date: Fri, 24 Jun 2022 18:04:49 +0200 Subject: [PATCH] add more split-chars to tokenizer, add () support (tested) --- ISBPL.java | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/ISBPL.java b/ISBPL.java index b3f51dc..f12c4e1 100644 --- a/ISBPL.java +++ b/ISBPL.java @@ -1719,15 +1719,25 @@ public class ISBPL { word.append('"'); isInString = true; } - else if(c == ' ') { - words.add(word.toString()); + else if(c == ' ' || c == '°' || c == 'ß' || c == '§') { + String w = word.toString(); + while(w.startsWith("(") && w.length() > 1) + w = w.substring(1); + while(w.endsWith(")") && w.length() > 1) + w = w.substring(0, w.length() - 1); + words.add(w); word = new StringBuilder(); } else { word.append(c); } } - words.add(word.toString()); + String w = word.toString(); + while(w.startsWith("(") && w.length() > 1) + w = w.substring(1); + while(w.endsWith(")") && w.length() > 1) + w = w.substring(0, w.length() - 1); + words.add(w); ArrayList cleanWords = new ArrayList<>(); for(int i = 0; i < words.size(); i++) {