Skip to content

Commit e88d655

Browse files
committed
Return lazy Iterable from LinkExtractor instead of List
This seems more fitting for the API, making it a "pull parser". It allows to go through all the links without having to have the whole collection.
1 parent 236163e commit e88d655

File tree

7 files changed

+134
-40
lines changed

7 files changed

+134
-40
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ import org.nibor.autolink.*;
5858

5959
String input = "wow, so example: http://test.com";
6060
LinkExtractor linkExtractor = LinkExtractor.builder().build();
61-
List<Link> links = linkExtractor.getLinks(input);
62-
Link link = links.get(0);
61+
Iterable<Link> links = linkExtractor.extractLinks(input);
62+
Link link = links.iterator().next();
6363
link.getType(); // LinkType.URL
6464
link.getBeginIndex(); // 17
6565
link.getEndIndex(); // 32
@@ -75,7 +75,8 @@ String input = "wow http://test.com such linked";
7575
LinkExtractor linkExtractor = LinkExtractor.builder()
7676
.linkTypes(EnumSet.of(LinkType.URL)) // limit to URLs
7777
.build();
78-
String result = Autolink.renderLinks(input, linkExtractor, (link, sb) -> {
78+
Iterable<Link> links = linkExtractor.extractLinks(input);
79+
String result = Autolink.renderLinks(input, links, (link, sb) -> {
7980
sb.append("<a href=\"");
8081
sb.append(input, link.getBeginIndex(), link.getEndIndex());
8182
sb.append("\">");

src/main/java/org/nibor/autolink/Autolink.java

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,20 @@
11
package org.nibor.autolink;
22

3-
import java.util.List;
4-
3+
/**
4+
* Utility class for processing text with links.
5+
*/
56
public class Autolink {
67

7-
public static String renderLinks(CharSequence input, LinkExtractor linkExtractor, LinkRenderer linkRenderer) {
8-
List<Link> links = linkExtractor.getLinks(input);
8+
/**
9+
* Render the supplied links from the supplied input text using a renderer. The parts of the text outside of links
10+
* are added to the result without processing.
11+
*
12+
* @param input the input text
13+
* @param links the links to render, see {@link LinkExtractor} to extract them
14+
* @param linkRenderer the link rendering function
15+
* @return the rendered string
16+
*/
17+
public static String renderLinks(CharSequence input, Iterable<Link> links, LinkRenderer linkRenderer) {
918
StringBuilder sb = new StringBuilder(input.length() + 16);
1019
int lastIndex = 0;
1120
for (Link link : links) {

src/main/java/org/nibor/autolink/LinkExtractor.java

Lines changed: 65 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
/**
1010
* Extracts links from input.
11-
* <p>
12-
* Create and configure an extractor using {@link #builder()}.
11+
* <p/>
12+
* Create and configure an extractor using {@link #builder()}, then call {@link #extractLinks}.
1313
*/
1414
public class LinkExtractor {
1515

@@ -27,35 +27,18 @@ public static Builder builder() {
2727
}
2828

2929
/**
30-
* Extract the links from the input. Can be called multiple times with different inputs, is thread-safe.
30+
* Extract the links from the input text. Can be called multiple times with different inputs (thread-safe).
3131
*
3232
* @param input the input text, must not be {@code null}
33-
* @return the links, in order that they appear in the input, never {@code null}
33+
* @return a lazy iterable for the links in order that they appear in the input, never {@code null}
3434
*/
35-
public List<Link> getLinks(CharSequence input) {
36-
List<Link> links = new ArrayList<>();
37-
38-
int rewindIndex = 0;
39-
int[] result = new int[2];
40-
41-
int i = 0;
42-
int length = input.length();
43-
while (i < length) {
44-
Scanner scanner = trigger(input.charAt(i));
45-
if (scanner != null) {
46-
boolean found = scanner.scan(input, i, rewindIndex, result);
47-
if (found) {
48-
links.add(new LinkImpl(scanner.getLinkType(), result[0], result[1]));
49-
rewindIndex = result[1];
50-
i = result[1];
51-
} else {
52-
i++;
53-
}
54-
} else {
55-
i++;
35+
public Iterable<Link> extractLinks(final CharSequence input) {
36+
return new Iterable<Link>() {
37+
@Override
38+
public Iterator<Link> iterator() {
39+
return new LinkIterator(input);
5640
}
57-
}
58-
return links;
41+
};
5942
}
6043

6144
private Scanner trigger(char c) {
@@ -88,7 +71,7 @@ private Builder() {
8871
* @return this builder
8972
*/
9073
public Builder linkTypes(Set<LinkType> linkTypes) {
91-
this.linkTypes = Objects.requireNonNull(linkTypes, "linkTypes must not be null");
74+
this.linkTypes = new HashSet<>(Objects.requireNonNull(linkTypes, "linkTypes must not be null"));
9275
return this;
9376
}
9477

@@ -133,4 +116,58 @@ public String toString() {
133116
}
134117
}
135118

119+
private class LinkIterator implements Iterator<Link> {
120+
121+
private final CharSequence input;
122+
123+
private Link next = null;
124+
private int index = 0;
125+
private int rewindIndex = 0;
126+
private int[] result = new int[2];
127+
128+
public LinkIterator(CharSequence input) {
129+
this.input = input;
130+
}
131+
132+
@Override
133+
public boolean hasNext() {
134+
setNext();
135+
return next != null;
136+
}
137+
138+
@Override
139+
public Link next() {
140+
if (hasNext()) {
141+
Link link = next;
142+
next = null;
143+
return link;
144+
} else {
145+
throw new NoSuchElementException();
146+
}
147+
}
148+
149+
private void setNext() {
150+
if (next != null) {
151+
return;
152+
}
153+
154+
int length = input.length();
155+
while (index < length) {
156+
Scanner scanner = trigger(input.charAt(index));
157+
if (scanner != null) {
158+
boolean found = scanner.scan(input, index, rewindIndex, result);
159+
if (found) {
160+
next = new LinkImpl(scanner.getLinkType(), result[0], result[1]);
161+
rewindIndex = result[1];
162+
index = result[1];
163+
break;
164+
} else {
165+
index++;
166+
}
167+
} else {
168+
index++;
169+
}
170+
}
171+
}
172+
}
136173
}

src/test/java/org/nibor/autolink/AutolinkBenchmark.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ public class AutolinkBenchmark extends AutolinkTestCase {
1919

2020
public static void main(String[] args) throws Exception {
2121
System.out.println("input length: " + GENERATED_INPUT.length());
22-
System.out.println("number of links: " + LinkExtractor.builder().build().getLinks(GENERATED_INPUT).size());
22+
Iterable<Link> links = LinkExtractor.builder().build().extractLinks(GENERATED_INPUT);
23+
int count = 0;
24+
for (Link ignore : links) {
25+
count++;
26+
}
27+
System.out.println("number of links: " + count);
2328
System.out.println();
2429
Main.main(args);
2530
}

src/test/java/org/nibor/autolink/AutolinkTestCase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ protected void assertNotLinked(String input) {
1717
protected abstract LinkExtractor getLinkExtractor();
1818

1919
protected String link(final String input, final String marker, final LinkType expectedLinkType) {
20-
return Autolink.renderLinks(input, getLinkExtractor(), new LinkRenderer() {
20+
Iterable<Link> links = getLinkExtractor().extractLinks(input);
21+
return Autolink.renderLinks(input, links, new LinkRenderer() {
2122
@Override
2223
public void render(Link link, StringBuilder sb) {
2324
if (expectedLinkType != null) {

src/test/java/org/nibor/autolink/AutolinkUrlTest.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
import java.util.Arrays;
1010
import java.util.EnumSet;
11-
import java.util.List;
1211

1312
import static org.junit.Assert.assertEquals;
1413

@@ -127,8 +126,8 @@ public void international() {
127126

128127
@Test
129128
public void linkToString() {
130-
List<Link> links = getLinkExtractor().getLinks("wow, so example: http://test.com");
131-
assertEquals("Link{type=URL, beginIndex=17, endIndex=32}", links.get(0).toString());
129+
Iterable<Link> links = getLinkExtractor().extractLinks("wow, so example: http://test.com");
130+
assertEquals("Link{type=URL, beginIndex=17, endIndex=32}", links.iterator().next().toString());
132131
}
133132

134133
@Override
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package org.nibor.autolink;
2+
3+
import org.junit.Test;
4+
5+
import java.util.Iterator;
6+
import java.util.NoSuchElementException;
7+
8+
import static org.junit.Assert.*;
9+
10+
public class LinkExtractorIterableTest {
11+
12+
@Test
13+
public void iteratorIsNew() {
14+
Iterable<Link> iterable = getSingleElementIterable();
15+
assertEquals(LinkType.URL, iterable.iterator().next().getType());
16+
assertEquals(LinkType.URL, iterable.iterator().next().getType());
17+
}
18+
19+
@Test
20+
public void hasNextOnlyAdvancesOnce() {
21+
Iterable<Link> iterable = getSingleElementIterable();
22+
Iterator<Link> iterator = iterable.iterator();
23+
assertTrue(iterator.hasNext());
24+
assertTrue(iterator.hasNext());
25+
assertNotNull(iterator.next());
26+
assertFalse(iterator.hasNext());
27+
assertFalse(iterator.hasNext());
28+
}
29+
30+
@Test(expected = NoSuchElementException.class)
31+
public void nextThrowsNoSuchElementException() {
32+
Iterable<Link> iterable = getSingleElementIterable();
33+
Iterator<Link> iterator = iterable.iterator();
34+
assertNotNull(iterator.next());
35+
iterator.next();
36+
}
37+
38+
private Iterable<Link> getSingleElementIterable() {
39+
String input = "foo http://example.com";
40+
return LinkExtractor.builder().build().extractLinks(input);
41+
}
42+
}

0 commit comments

Comments
 (0)