Skip to content

Commit

Permalink
Fortified Unit Title values
Browse files Browse the repository at this point in the history
  • Loading branch information
pgram1 committed Mar 7, 2020
1 parent 239c7b3 commit 91ca365
Showing 1 changed file with 15 additions and 15 deletions.
30 changes: 15 additions & 15 deletions Document.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,15 @@ private void getDocInfo() throws InterruptedException, IOException {
if (content.length() == 1)
return;

this.unitTitle = between(content, "UNIT TITLE", "CREDITS");
// using a character that is highly unlikely to be used as a delimiter to
// replace big empty space
content = content.trim().replaceAll("\\s{2,}", "█");
System.out.println(content);

// using our delimiter to find all words of the unit title without knowing the
// next field, parsing errors occur when the unit title contains more than 1
// spaces
this.unitTitle = between(content, "UNIT TITLE█", "█");
this.unitCodeHelper = new UnitCodeHelper(findUnitCode(content, "UNIT CODE"));

// we use the information derived from the unit code to achieve data uniformity
Expand All @@ -79,21 +87,13 @@ private String readFile(String path, Charset encoding) throws IOException {
return new String(encoded, encoding);
}

private String between(String value, String a, String b) {
// Return a substring between the two strings.
int posA = value.indexOf(a);
if (posA == -1) {
return "";
private String between(String original, String a, String b) {
Pattern pattern = Pattern.compile(a + "(.*?)" + b, Pattern.DOTALL);
Matcher matcher = pattern.matcher(original);
while (matcher.find()) {
return matcher.group(1);
}
int posB = value.lastIndexOf(b);
if (posB == -1) {
return "";
}
int adjustedPosA = posA + a.length();
if (adjustedPosA >= posB) {
return "";
}
return value.substring(adjustedPosA, posB).trim().replaceAll("\\s{2,}", " ");
return null;
}

private String findSemester(String str, String word) {
Expand Down

0 comments on commit 91ca365

Please sign in to comment.