Skip to content

Commit 2296ca4

Browse files
authored
Merge pull request #25 from robinst/add-spans-to-deprecate-renderlinks
Add extractSpans method so that we can deprecate renderLinks (#21)
2 parents 2ad2795 + c320522 commit 2296ca4

File tree

13 files changed

+318
-39
lines changed

13 files changed

+318
-39
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
66
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
77

88
## Unreleased
9+
### Added
10+
- Add `extractSpans` method that also returns the text pieces of the
11+
input before, between and after links. This makes it more convenient
12+
to write code that transforms the whole input text without having to
13+
manually keep track of indexes.
914
### Changed
15+
- Deprecated `Autolink.renderLinks` and `LinkRenderer`, see "added".
1016
- Stop URLs when encountering an `"`. This is consistent with RFC 3986,
1117
and it seems unlikely that a user would have an unescaped `"` in a URL
1218
anyway, as browsers escape it when you copy an URL that contains one.

README.md

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -52,26 +52,38 @@ Note that by default all supported types of links are extracted. If
5252
you're only interested in specific types, narrow it down using the
5353
`linkTypes` method.
5454

55-
There's also a static method to replace links found in the text. Here's
56-
an example of using that for wrapping URLs in an `<a>` tag. Note that it
57-
doesn't handle escaping at all:
55+
There's another method which is convenient for when you want to transform
56+
all of the input text to something else. Here's an example of using that
57+
to transform the text to HTML and wrapping URLs in an `<a>` tag (escaping
58+
is done using owasp-java-encoder):
5859

5960
```java
6061
import org.nibor.autolink.*;
62+
import org.owasp.encoder.Encode;
6163

6264
String input = "wow http://test.com such linked";
6365
LinkExtractor linkExtractor = LinkExtractor.builder()
6466
.linkTypes(EnumSet.of(LinkType.URL)) // limit to URLs
6567
.build();
66-
Iterable<LinkSpan> links = linkExtractor.extractLinks(input);
67-
String result = Autolink.renderLinks(input, links, (link, text, sb) -> {
68-
sb.append("<a href=\"");
69-
sb.append(text, link.getBeginIndex(), link.getEndIndex());
70-
sb.append("\">");
71-
sb.append(text, link.getBeginIndex(), link.getEndIndex());
72-
sb.append("</a>");
73-
});
74-
result; // "wow <a href=\"http://test.com\">http://test.com</a> such linked"
68+
Iterable<Span> spans = linkExtractor.extractSpans(input);
69+
70+
StringBuilder sb = new StringBuilder();
71+
for (Span span : spans) {
72+
String text = input.substring(span.getBeginIndex(), span.getEndIndex());
73+
if (span instanceof LinkSpan) {
74+
// span is a URL
75+
sb.append("<a href=\"");
76+
sb.append(Encode.forHtmlAttribute(text));
77+
sb.append("\">");
78+
sb.append(Encode.forHtml(text));
79+
sb.append("</a>");
80+
} else {
81+
// span is plain text before/after link
82+
sb.append(Encode.forHtml(text));
83+
}
84+
}
85+
86+
sb.toString(); // "wow <a href=\"http://test.com\">http://test.com</a> such linked"
7587
```
7688

7789
Features

src/main/java/org/nibor/autolink/Autolink.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@ public class Autolink {
1313
* @param links the links to render, see {@link LinkExtractor} to extract them
1414
* @param linkRenderer the link rendering implementation
1515
* @return the rendered string
16+
* @deprecated use {@link LinkExtractor#extractSpans(CharSequence)} instead
1617
*/
18+
@Deprecated
1719
public static String renderLinks(CharSequence input, Iterable<LinkSpan> links, LinkRenderer linkRenderer) {
1820
if (input == null) {
1921
throw new NullPointerException("input must not be null");

src/main/java/org/nibor/autolink/LinkExtractor.java

Lines changed: 81 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
package org.nibor.autolink;
22

3-
import org.nibor.autolink.internal.EmailScanner;
3+
import org.nibor.autolink.internal.*;
44
import org.nibor.autolink.internal.Scanner;
5-
import org.nibor.autolink.internal.UrlScanner;
6-
import org.nibor.autolink.internal.WwwScanner;
75

86
import java.util.*;
97

@@ -33,6 +31,7 @@ public static Builder builder() {
3331
*
3432
* @param input the input text, must not be null
3533
* @return a lazy iterable for the links in order that they appear in the input, never null
34+
* @see #extractSpans(CharSequence) extractSpans to also get spans for the plain text pieces of the input
3635
*/
3736
public Iterable<LinkSpan> extractLinks(final CharSequence input) {
3837
if (input == null) {
@@ -46,6 +45,28 @@ public Iterator<LinkSpan> iterator() {
4645
};
4746
}
4847

48+
/**
49+
* Extract spans from the input text. A span is a substring of the input and represents either a link
50+
* (see {@link LinkSpan}) or plain text outside a link.
51+
* <p>
52+
* Using this is more convenient than {@link #extractLinks} if you want to transform the whole input text to
53+
* a different format.
54+
*
55+
* @param input the input text, must not be null
56+
* @return a lazy iterable for the spans in order that they appear in the input, never null
57+
*/
58+
public Iterable<Span> extractSpans(final CharSequence input) {
59+
if (input == null) {
60+
throw new NullPointerException("input must not be null");
61+
}
62+
return new Iterable<Span>() {
63+
@Override
64+
public Iterator<Span> iterator() {
65+
return new SpanIterator(input, new LinkIterator(input));
66+
}
67+
};
68+
}
69+
4970
private Scanner trigger(char c) {
5071
switch (c) {
5172
case ':':
@@ -83,7 +104,7 @@ public Builder linkTypes(Set<LinkType> linkTypes) {
83104

84105
/**
85106
* @param emailDomainMustHaveDot true if the domain in an email address is required to have more than one part,
86-
* false if it can also just have single part (e.g. foo@com); true by default
107+
* false if it can also just have single part (e.g. foo@com); true by default
87108
* @return this builder
88109
*/
89110
public Builder emailDomainMustHaveDot(boolean emailDomainMustHaveDot) {
@@ -160,4 +181,60 @@ private void setNext() {
160181
}
161182
}
162183
}
184+
185+
private class SpanIterator implements Iterator<Span> {
186+
187+
private final CharSequence input;
188+
private final LinkIterator linkIterator;
189+
190+
private int index = 0;
191+
private LinkSpan nextLink = null;
192+
193+
public SpanIterator(CharSequence input, LinkIterator linkIterator) {
194+
this.input = input;
195+
this.linkIterator = linkIterator;
196+
}
197+
198+
@Override
199+
public boolean hasNext() {
200+
return index < input.length();
201+
}
202+
203+
private Span nextTextSpan(int endIndex) {
204+
Span span = new SpanImpl(index, endIndex);
205+
index = endIndex;
206+
return span;
207+
}
208+
209+
@Override
210+
public Span next() {
211+
if (!hasNext()) {
212+
throw new NoSuchElementException();
213+
}
214+
215+
if (nextLink == null) {
216+
if (linkIterator.hasNext()) {
217+
nextLink = linkIterator.next();
218+
} else {
219+
return nextTextSpan(input.length());
220+
}
221+
}
222+
223+
if (index < nextLink.getBeginIndex()) {
224+
// text before link, return plain
225+
return nextTextSpan(nextLink.getBeginIndex());
226+
} else {
227+
// at link, return it and make sure we continue after it next time
228+
Span span = nextLink;
229+
index = nextLink.getEndIndex();
230+
nextLink = null;
231+
return span;
232+
}
233+
}
234+
235+
@Override
236+
public void remove() {
237+
throw new UnsupportedOperationException("remove");
238+
}
239+
}
163240
}

src/main/java/org/nibor/autolink/LinkRenderer.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package org.nibor.autolink;
22

33
/**
4-
* Renderer for a link.
4+
* Renderer for a link
5+
*
6+
* @deprecated use {@link LinkExtractor#extractSpans(CharSequence)} instead.
57
*/
8+
@Deprecated
69
public interface LinkRenderer {
710

811
/**

src/main/java/org/nibor/autolink/LinkSpan.java

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,11 @@
33
/**
44
* Information for an extracted link.
55
*/
6-
public interface LinkSpan {
6+
public interface LinkSpan extends Span {
77

88
/**
99
* @return the type of link
1010
*/
1111
LinkType getType();
1212

13-
/**
14-
* @return begin index (inclusive) in the original input that this link starts at
15-
*/
16-
int getBeginIndex();
17-
18-
/**
19-
* @return end index (exclusive) in the original input that this link ends at; in other words, index of first
20-
* character after link
21-
*/
22-
int getEndIndex();
23-
2413
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package org.nibor.autolink;
2+
3+
/**
4+
* A reference to a piece of the input text, either a link (see {@link LinkSpan}) or plain text.
5+
*/
6+
public interface Span {
7+
8+
/**
9+
* @return begin index (inclusive) in the original input that this link starts at
10+
*/
11+
int getBeginIndex();
12+
13+
/**
14+
* @return end index (exclusive) in the original input that this link ends at; in other words, index of first
15+
* character after link
16+
*/
17+
int getEndIndex();
18+
19+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package org.nibor.autolink.internal;
2+
3+
import org.nibor.autolink.Span;
4+
5+
public class SpanImpl implements Span {
6+
7+
private final int beginIndex;
8+
private final int endIndex;
9+
10+
public SpanImpl(int beginIndex, int endIndex) {
11+
this.beginIndex = beginIndex;
12+
this.endIndex = endIndex;
13+
}
14+
15+
@Override
16+
public int getBeginIndex() {
17+
return beginIndex;
18+
}
19+
20+
@Override
21+
public int getEndIndex() {
22+
return endIndex;
23+
}
24+
25+
@Override
26+
public String toString() {
27+
return "Span{beginIndex=" + beginIndex + ", endIndex=" + endIndex + "}";
28+
}
29+
}

src/test/java/org/nibor/autolink/AutolinkBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ public static void main(String[] args) throws Exception {
3232

3333
@Benchmark
3434
public void generatedText() {
35-
link(GENERATED_INPUT, "|", null);
35+
renderExtractedLinks(GENERATED_INPUT, "|", null);
3636
}
3737

3838
@Override

src/test/java/org/nibor/autolink/AutolinkTestCase.java

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,25 @@
55
public abstract class AutolinkTestCase {
66

77
protected void assertLinked(String input, String expected, LinkType expectedLinkType) {
8-
String result = link(input, "|", expectedLinkType);
8+
String result = renderExtractedLinks(input, "|", expectedLinkType);
99
assertEquals(expected, result);
10+
11+
result = renderExtractedSpans(input, "|", expectedLinkType);
12+
assertEquals(expected, result);
13+
1014
}
1115

1216
protected void assertNotLinked(String input) {
13-
String result = link(input, "|", null);
17+
String result = renderExtractedLinks(input, "|", null);
18+
assertEquals(input, result);
19+
20+
result = renderExtractedSpans(input, "|", null);
1421
assertEquals(input, result);
1522
}
1623

1724
protected abstract LinkExtractor getLinkExtractor();
1825

19-
protected String link(String input, final String marker, final LinkType expectedLinkType) {
26+
protected String renderExtractedLinks(String input, final String marker, final LinkType expectedLinkType) {
2027
Iterable<LinkSpan> links = getLinkExtractor().extractLinks(input);
2128
return Autolink.renderLinks(input, links, new LinkRenderer() {
2229
@Override
@@ -31,4 +38,23 @@ public void render(LinkSpan link, CharSequence text, StringBuilder sb) {
3138
});
3239
}
3340

41+
protected String renderExtractedSpans(String input, final String marker, final LinkType expectedLinkType) {
42+
Iterable<Span> spans = getLinkExtractor().extractSpans(input);
43+
StringBuilder sb = new StringBuilder();
44+
for (Span span : spans) {
45+
if (span instanceof LinkSpan) {
46+
LinkType type = ((LinkSpan) span).getType();
47+
if (expectedLinkType != null) {
48+
assertEquals(expectedLinkType, type);
49+
}
50+
sb.append(marker);
51+
sb.append(input, span.getBeginIndex(), span.getEndIndex());
52+
sb.append(marker);
53+
} else {
54+
sb.append(input, span.getBeginIndex(), span.getEndIndex());
55+
}
56+
}
57+
return sb.toString();
58+
}
59+
3460
}

0 commit comments

Comments
 (0)