Skip to content
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
<parent>
<artifactId>verapdf-parent</artifactId>
<groupId>org.verapdf</groupId>
<version>1.26.2</version>
<version>1.28.1</version>
</parent>

<groupId>org.verapdf</groupId>
<artifactId>wcag-algorithms</artifactId>
<version>1.26.0</version>
<version>1.28.0</version>

<name>veraPDF wcag algorithms</name>
<description>veraPDF algorithms for semantic checks.</description>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ public void add(TextLine line) {
textLines.add(line);
super.add(line);
}

public void add(List<TextLine> lines) {
for (TextLine line : lines) {
add(line);
}
}

public int getLinesNumber() {
return textLines.size();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public class TextChunk extends TextInfoChunk {
private double fontWeight;
private double italicAngle;
private double[] fontColor;
private double contrastRatio;
private double contrastRatio = Integer.MAX_VALUE;
private boolean hasSpecialStyle = false;
private boolean hasSpecialBackground = false;
private double[] backgroundColor;
Expand Down Expand Up @@ -169,30 +169,21 @@ public Double getSymbolWidth(int index) {
}

public void adjustSymbolEndsToBoundingBox(List<Double> symbolEnds) {
if (this.symbolEnds == null) {
double textStart = getTextStart();
double textEnd = getTextEnd();
if (symbolEnds == null) {
this.symbolEnds = new ArrayList<>(value.length() + 1);
double symbolEnd = getTextStart();
double symbolEnd = textStart;
this.symbolEnds.add(symbolEnd);
double averageWidth = getAverageSymbolWidth();
if (isRightLeftHorizontalText() || isUpBottomVerticalText()) {
for (int i = 0; i < value.length(); i++) {
symbolEnd -= averageWidth;
this.symbolEnds.add(symbolEnd);
}
} else {
for (int i = 0; i < value.length(); i++) {
symbolEnd += averageWidth;
this.symbolEnds.add(symbolEnd);
}
double averageWidth = (textEnd - textStart) / value.length();
for (int i = 0; i < value.length(); i++) {
symbolEnd += averageWidth;
this.symbolEnds.add(symbolEnd);
}
return;
}
double textStart = getTextStart();
if (isRightLeftHorizontalText() || isUpBottomVerticalText()) {
this.symbolEnds = symbolEnds.stream().map(e -> e - textStart).collect(Collectors.toList());
} else {
this.symbolEnds = symbolEnds.stream().map(e -> e + textStart).collect(Collectors.toList());
}
double multiplier = (textEnd - textStart) / (symbolEnds.get(symbolEnds.size() - 1) - symbolEnds.get(0));
this.symbolEnds = symbolEnds.stream().map(e -> textStart + e * multiplier).collect(Collectors.toList());
}

public double getAverageSymbolWidth() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public class ListInterval {
private List<ListItemInfo> listItemsInfos = new ArrayList<>();
public Integer numberOfColumns;

public ListInterval() {
public ListInterval() {

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ else if (differentLinesProbability >= toColumnsMergeProbability) {
mergeProbability = differentLinesProbability;
TextColumn lastColumn = new TextColumn(currentTextNode.getLastColumn());
TextBlock lastBlock = new TextBlock(lastColumn.getLastTextBlock());
lastBlock.getLines().addAll(lines);
lastBlock.add(lines);
lastColumn.setLastTextBlock(lastBlock);
currentTextNode.setLastColumn(lastColumn);
if (nextTextNode.getColumnsNumber() > 1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public class ClusterTableConsumer extends WCAGConsumer {
private final List<Table> tables;
private final List<PDFList> lists;

public ClusterTableConsumer() {
public ClusterTableConsumer() {
tables = new ArrayList<>();
lists = new ArrayList<>();
init();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ public BufferedImage getRenderPage(int pageNumber) {

public void calculateContrastRatio(TextChunk textChunk) {
BufferedImage renderedPage = getRenderPage(textChunk.getPageNumber());
calculateContrastRation(textChunk, renderedPage);
if (renderedPage != null) {
calculateContrastRation(textChunk, renderedPage);
}
}

private void calculateContrastRatio(SemanticTextNode node) {
Expand All @@ -109,7 +111,6 @@ private void calculateContrastRatio(SemanticTextNode node) {

public void calculateContrastRation(TextChunk textChunk, BufferedImage renderedPage) {
if ((textChunk.getValue() != null && (TextChunkUtils.isWhiteSpaceChunk(textChunk)))) {
textChunk.setContrastRatio(Integer.MAX_VALUE);
return;
}

Expand All @@ -127,15 +128,13 @@ public void calculateContrastRation(TextChunk textChunk, BufferedImage renderedP
if (isOverlappingBox) {
scaledBBox = scaledBBox.cross(pageBBox);
} else if (!pageBBox.contains(scaledBBox)) {
textChunk.setContrastRatio(Integer.MAX_VALUE);
return;
}
int x = (int) (Math.round(scaledBBox.getLeftX()));
int y = (int) (Math.round(scaledBBox.getTopY()));
int width = getIntegerBBoxValueForProcessing(scaledBBox.getWidth(), 1);
int height = getIntegerBBoxValueForProcessing(scaledBBox.getHeight(), 1);
if (width <= 1 || height <= 1) {
textChunk.setContrastRatio(Integer.MAX_VALUE);
return;
}
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
import org.verapdf.wcag.algorithms.semanticalgorithms.containers.StaticContainers;
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.WCAGProgressStatus;

import java.util.List;
import java.util.LinkedList;
import java.util.Set;
import java.util.HashSet;
import java.util.*;

public class LinesPreprocessingConsumer extends WCAGConsumer {

Expand All @@ -32,7 +29,7 @@ public void findTableBorders() {
}

private List<TableBorderBuilder> findTableBorders(Integer pageNumber) {
List<TableBorderBuilder> tableBorders = new LinkedList<>();
List<TableBorderBuilder> tableBorders = new ArrayList<>();
Set<LineChunk> set = new HashSet<>(StaticContainers.getLinesCollection().getHorizontalLines(pageNumber));
set.addAll(StaticContainers.getLinesCollection().getVerticalLines(pageNumber));
set.addAll(StaticContainers.getLinesCollection().getSquares(pageNumber));
Expand Down Expand Up @@ -81,14 +78,6 @@ private List<TableBorderBuilder> findTableBorders(Integer pageNumber) {
i++;
}
}
for (TableBorderBuilder border : tableBorders) {
for (LineChunk lineChunk : border.getVerticalLines()) {
StaticContainers.getLinesCollection().getVerticalLines(pageNumber).remove(lineChunk);
}
for (LineChunk lineChunk : border.getHorizontalLines()) {
StaticContainers.getLinesCollection().getHorizontalLines(pageNumber).remove(lineChunk);
}
}
for (int i = 0; i < tableBorders.size();) {
TableBorderBuilder border = tableBorders.get(i);
if ((border.getHorizontalLinesNumber() <= 2 && border.getVerticalLinesNumber() <= 1) ||
Expand All @@ -98,21 +87,27 @@ private List<TableBorderBuilder> findTableBorders(Integer pageNumber) {
i++;
}
}
for (TableBorderBuilder border : tableBorders) {
StaticContainers.getLinesCollection().getVerticalLines(pageNumber).removeAll(border.getVerticalLines());
StaticContainers.getLinesCollection().getHorizontalLines(pageNumber).removeAll(border.getHorizontalLines());
}
return tableBorders;
}

private void mergeTableBorders(List<TableBorderBuilder> tableBorders) {
for (int i = tableBorders.size() - 2; i >= 0; i--) {
TableBorderBuilder border = tableBorders.get(i);
for (int j = i + 1; j < tableBorders.size();) {
List<Integer> indexes = new LinkedList<>();
for (int j = tableBorders.size() - 1; j > i; j--) {
TableBorderBuilder border2 = tableBorders.get(j);
if (border.isConnectedBorder(border2)) {
border.mergeBorder(border2);
tableBorders.remove(j);
} else {
j++;
indexes.add(j);
}
}
for (Integer index : indexes) {
border.mergeBorder(tableBorders.get(index));
tableBorders.remove((int)index);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,15 @@
import org.verapdf.wcag.algorithms.entities.lists.ListIntervalsCollection;
import org.verapdf.wcag.algorithms.entities.lists.info.ListItemInfo;
import org.verapdf.wcag.algorithms.entities.lists.info.ListItemTextInfo;
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.listLabelsDetection.AlfaLettersListLabelsDetectionAlgorithm1;
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.listLabelsDetection.AlfaLettersListLabelsDetectionAlgorithm2;
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.listLabelsDetection.ArabicNumbersListLabelsDetectionAlgorithm;
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.listLabelsDetection.RomanNumbersListLabelsDetectionAlgorithm;
import org.verapdf.wcag.algorithms.semanticalgorithms.utils.listLabelsDetection.*;

import java.util.*;

public class ListLabelsUtils {

private static final Set<Character> labels = new HashSet<>(
Arrays.asList('\u002D', '\u2022', '\u25CF', '\u2714', '\u2717', '\u2794', '\u27A2', '\uE00A', '\uE00C',
'\uF076', '\u2588', '\u25A0', '\u2013', '\uF0B7', '\uF0A7', '\u25A1', '\uF0A1', '\u25AA', '\u25FC')); //office labels examples (-, •, ✔, ✗, ●, ➔, ➢), pdf files labels examples (█, ■, , □, , ▪, ◼)
'\uF076', '\u2588', '\u25A0', '\u2013', '\uF0B7', '\uF0A7', '\u25A1', '\uF0A1', '\u25AA', '\u25FC', '\u25CB', '\u203B', '\u274D')); //office labels examples (-, •, ✔, ✗, ●, ➔, ➢), pdf files labels examples (█, ■, , □, , ▪, ◼, ○, ※, ❍)
private static final Character o = '\u006F';

public static boolean isListLabel(String value) {
Expand Down Expand Up @@ -70,6 +67,7 @@ public static boolean isListLabels(List<String> listLabels) {
}
return new RomanNumbersListLabelsDetectionAlgorithm().isListLabels(labels, commonStartLength, commonEndLength) ||
new ArabicNumbersListLabelsDetectionAlgorithm().isListLabels(labels, commonStartLength, commonEndLength) ||
new KoreanLettersListLabelsDetectionAlgorithm().isListLabels(labels, commonStartLength, commonEndLength) ||
new AlfaLettersListLabelsDetectionAlgorithm1().isListLabels(labels, commonStartLength, commonEndLength) ||
new AlfaLettersListLabelsDetectionAlgorithm2().isListLabels(labels, commonStartLength, commonEndLength);
}
Expand Down Expand Up @@ -142,6 +140,7 @@ public static Set<ListInterval> getListItemsIntervals(List<ListItemTextInfo> ite
ListIntervalsCollection listIntervals = new ListIntervalsCollection(getItemsWithEqualsLabels(itemsInfo));
listIntervals.putAll(new AlfaLettersListLabelsDetectionAlgorithm1().getItemsIntervals(itemsInfo));
listIntervals.putAll(new AlfaLettersListLabelsDetectionAlgorithm2().getItemsIntervals(itemsInfo));
listIntervals.putAll(new KoreanLettersListLabelsDetectionAlgorithm().getItemsIntervals(itemsInfo));
listIntervals.putAll(new RomanNumbersListLabelsDetectionAlgorithm().getItemsIntervals(itemsInfo));
listIntervals.putAll(new ArabicNumbersListLabelsDetectionAlgorithm().getItemsIntervals(itemsInfo));
return listIntervals.getSet();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,9 @@ protected String getLowerCaseRegex() {
protected String getUpperCaseRegex() {
return UPPER_CASE_ENGLISH_LETTER_REGEX;
}

@Override
protected List<Character> getLetters() {
return letters;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,22 @@ protected Integer getNumberFromString(String string) {
return getNumberFromLetters1(string);
}

private static String getLetters1FromNumber(int integer) {
private String getLetters1FromNumber(int integer) {
integer--;
int n = integer / letters.size();
char c = letters.get(integer % letters.size());
int n = integer / getLetters().size();
char c = getLetters().get(integer % getLetters().size());
StringBuilder str = new StringBuilder();
for (int i = 0; i <= n; i++) {
str.append(c);
}
return str.toString();
}

private static Integer getNumberFromLetters1(String s) {
private Integer getNumberFromLetters1(String s) {
if (s.isEmpty()) {
return null;
}
int num = letters.indexOf(s.charAt(0));
int num = getLetters().indexOf(s.charAt(0));
if (num < 0) {
return null;
}
Expand All @@ -36,6 +36,6 @@ private static Integer getNumberFromLetters1(String s) {
return null;
}
}
return letters.size() * (s.length() - 1) + num + 1;
return getLetters().size() * (s.length() - 1) + num + 1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,28 +12,28 @@ protected Integer getNumberFromString(String string) {
return getNumberFromLetters2(string);
}

private static String getLetters2FromNumber(int integer) {
private String getLetters2FromNumber(int integer) {
StringBuilder str = new StringBuilder();
while (integer > 0) {
integer--;
int k = integer % letters.size();
str.insert(0, letters.get(k));
integer /= letters.size();
int k = integer % getLetters().size();
str.insert(0, getLetters().get(k));
integer /= getLetters().size();
}
return str.toString();
}

private static Integer getNumberFromLetters2(String s) {
private Integer getNumberFromLetters2(String s) {
if (s.isEmpty()) {
return null;
}
int result = 0;
for (char c : s.toCharArray()) {
int num = letters.indexOf(c);
int num = getLetters().indexOf(c);
if (num < 0) {
return null;
}
result = result * letters.size() + num + 1;
result = result * getLetters().size() + num + 1;
}
return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,8 @@ class ArabicNumberInformation {
private boolean isBadItem(ListItemTextInfo listItem) {
String item = listItem.getListItem();
String nextSubstring = item.substring(start);
int nextNumberOfStartZeros = getNumberOfStartZeros(nextSubstring);
int nextArabicNumberStartLength = getRegexStartLength(nextSubstring);
if (nextSubstring.length() == nextNumberOfStartZeros + nextArabicNumberStartLength && listItem.hasOneLine()) {
if (nextSubstring.length() == nextArabicNumberStartLength && listItem.hasOneLine()) {
return true;
}
if (item.matches(DOUBLE_REGEX) && listItem.hasOneLine()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package org.verapdf.wcag.algorithms.semanticalgorithms.utils.listLabelsDetection;

import java.util.Arrays;
import java.util.List;

public class CircledArabicNumbersListLabelsDetectionAlgorithm extends LettersListLabelsDetectionAlgorithm {

protected static final List<Character> letters = Arrays.asList(
'\u24EA','\u2460','\u2461','\u2462','\u2463','\u2464','\u2465','\u2466','\u2467','\u2468',
'\u2469','\u246A','\u246B','\u246C','\u246D','\u246E','\u246F','\u2470','\u2471','\u2472',
'\u2473','\u3251','\u3252','\u3253','\u3254','\u3255','\u3256','\u3257','\u3258','\u3259',
'\u325A','\u325B','\u325C','\u325D','\u325E','\u325F','\u32B1','\u32B2','\u32B3','\u32B4',
'\u32B5','\u32B6','\u32B7','\u32B8','\u32B9','\u32BA','\u32BB','\u32BC','\u32BD','\u32BE',
'\u32BF');

private static final String KOREAN_LETTER_REGEX = "[\u24EA\u2460-\u2473\u3251-\u325F\u32B1-\u32BF]+";
private static final String UPPER_CASE_KOREAN_LETTER_REGEX = KOREAN_LETTER_REGEX;
private static final String LOWER_CASE_KOREAN_LETTER_REGEX = KOREAN_LETTER_REGEX;

@Override
protected String getRegex() {
return KOREAN_LETTER_REGEX;
}

@Override
protected String getLowerCaseRegex() {
return LOWER_CASE_KOREAN_LETTER_REGEX;
}

@Override
protected List<Character> getLetters() {
return letters;
}

@Override
protected String getUpperCaseRegex() {
return UPPER_CASE_KOREAN_LETTER_REGEX;
}
}
Loading