robinst
diff --git a/‎.gitignore
Lines changed: 6 additions & 0 deletions b/‎.gitignore
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 89 additions & 2 deletions b/‎README.md
Lines changed: 89 additions & 2 deletions
diff --git a/‎pom.xml
Lines changed: 65 additions & 0 deletions b/‎pom.xml
Lines changed: 65 additions & 0 deletions
diff --git a/‎src/main/java/org/nibor/autolink/Autolink.java
Lines changed: 22 additions & 0 deletions b/‎src/main/java/org/nibor/autolink/Autolink.java
Lines changed: 22 additions & 0 deletions
diff --git a/‎src/main/java/org/nibor/autolink/Link.java
Lines changed: 24 additions & 0 deletions b/‎src/main/java/org/nibor/autolink/Link.java
Lines changed: 24 additions & 0 deletions
diff --git a/‎src/main/java/org/nibor/autolink/LinkExtractor.java
Lines changed: 136 additions & 0 deletions b/‎src/main/java/org/nibor/autolink/LinkExtractor.java
Lines changed: 136 additions & 0 deletions
diff --git a/‎src/main/java/org/nibor/autolink/LinkRenderer.java
Lines changed: 7 additions & 0 deletions b/‎src/main/java/org/nibor/autolink/LinkRenderer.java
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/main/java/org/nibor/autolink/LinkType.java
Lines changed: 5 additions & 0 deletions b/‎src/main/java/org/nibor/autolink/LinkType.java
Lines changed: 5 additions & 0 deletions
@@ -0,0 +1,6 @@
+# IDEA
+*.iml
+/.idea/
+
+# mvn
+target/
@@ -1,2 +1,89 @@
-# autolink-java
-Java library to extract links (URLs, email addresses) from plain text; fast, small and hopefully smart
+autolink-java
+=============
+
+Java library to extract links such as URLs and email addresses from plain text.
+Fast, small and tries to be smart with matching (text is hard).
+
+Inspired by [Rinku](https://github.com/vmg/rinku). Similar to it, regular
+expressions are not used. Instead, the input text is parsed in one pass with
+limited backtracking.
+
+Features
+--------
+
+### URL extraction
+
+Extracts URLs of the form `scheme://example` with any scheme. URIs such
+as `example:test` are not matched (may be added as an option in the future).
+If only certain schemes should be allowed, the result can be filtered.
+
+Includes heuristics for not including trailing delimiters such as punctuation
+and unbalanced parentheses, see examples below.
+
+Supports internationalized domain names (IDN). Note that they are not validated
+and as a result, invalid URLs may be matched.
+
+Example input and the extracted link:
+
+* `http://example.com.` → `http://example.com`
+* `http://example.com,` → `http://example.com`
+* `(http://example.com)` → `http://example.com`
+* `(... (see http://example.com))` → `http://example.com`
+* `https://en.wikipedia.org/wiki/Link_(The_Legend_of_Zelda)` →
+  `https://en.wikipedia.org/wiki/Link_(The_Legend_of_Zelda)`
+* `http://üñîçøðé.com/` → `http://üñîçøðé.com/`
+
+### Email address extraction
+
+Extracts emails such as `foo@example.com`. Doesn't support quoted local parts
+such as `"this is sparta"@example.com`. Matches international email addresses,
+but doesn't verify the domain name (may match too much).
+
+Examples:
+
+* `foo@example.com` → `foo@example.com`
+* `foo@example.com.` → `foo@example.com`
+* `foo@example.com,` → `foo@example.com`
+* `üñîçøðé@üñîçøðé.com` → `üñîçøðé@üñîçøðé.com`
+
+Usage
+-----
+
+Extract links:
+
+```java
+import org.nibor.autolink.*;
+
+String input = "wow, so example: http://test.com";
+LinkExtractor linkExtractor = LinkExtractor.builder().build();
+List<Link> links = linkExtractor.getLinks(input);
+Link link = links.get(0);
+link.getType();        // LinkType.URL
+link.getBeginIndex();  // 17
+link.getEndIndex();    // 32
+input.substring(link.getBeginIndex(), link.getEndIndex());  // "http://test.com"
+```
+
+Wrapping URLs in an <a> tag (doesn't handle escaping, uses Java 8):
+
+```java
+import org.nibor.autolink.*;
+
+String input = "wow http://test.com such linked";
+LinkExtractor linkExtractor = LinkExtractor.builder()
+        .linkTypes(EnumSet.of(LinkType.URL)) // limit to URLs
+        .build();
+String result = Autolink.renderLinks(input, linkExtractor, (link, sb) -> {
+    sb.append("<a href=\"");
+    sb.append(input, link.getBeginIndex(), link.getEndIndex());
+    sb.append("\">");
+    sb.append(input, link.getBeginIndex(), link.getEndIndex());
+    sb.append("</a>");
+});
+result;  // wow <a href="http://test.com">http://test.com</a> such linked
+```
+
+License
+-------
+
+MIT, see [LICENSE] file.
@@ -0,0 +1,65 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <groupId>org.nibor.autolink</groupId>
+    <artifactId>autolink</artifactId>
+    <version>0.1.0-SNAPSHOT</version>
+
+    <name>autolink-java</name>
+    <description>
+        Java library to extract links (URLs, email addresses) from plain text; fast, small and hopefully smart
+    </description>
+    <url>https://github.com/robinst/autolink-java</url>
+
+    <properties>
+        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    </properties>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-compiler-plugin</artifactId>
+                <version>3.3</version>
+                <configuration>
+                    <source>7</source>
+                    <target>7</target>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <dependencies>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.12</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <developers>
+        <developer>
+            <name>Robin Stocker</name>
+            <email>robin@nibor.org</email>
+            <url>https://github.com/robinst/</url>
+        </developer>
+    </developers>
+
+    <licenses>
+        <license>
+            <name>MIT License</name>
+            <url>http://www.opensource.org/licenses/mit-license.php</url>
+        </license>
+    </licenses>
+
+    <scm>
+        <connection>scm:git:git@github.com:robinst/autolink-java.git</connection>
+        <developerConnection>scm:git:git@github.com:robinst/autolink-java.git</developerConnection>
+        <url>https://github.com/robinst/autolink-java</url>
+    </scm>
+
+</project>
@@ -0,0 +1,22 @@
+package org.nibor.autolink;
+
+import java.util.List;
+
+public class Autolink {
+
+    public static String renderLinks(CharSequence input, LinkExtractor linkExtractor, LinkRenderer linkRenderer) {
+        List<Link> links = linkExtractor.getLinks(input);
+        StringBuilder sb = new StringBuilder(input.length() + 16);
+        int lastIndex = 0;
+        for (Link link : links) {
+            sb.append(input, lastIndex, link.getBeginIndex());
+            linkRenderer.render(link, sb);
+            lastIndex = link.getEndIndex();
+        }
+        if (lastIndex < input.length()) {
+            sb.append(input, lastIndex, input.length());
+        }
+        return sb.toString();
+    }
+
+}
@@ -0,0 +1,24 @@
+package org.nibor.autolink;
+
+/**
+ * Information for an extracted link.
+ */
+public interface Link {
+
+    /**
+     * @return the type of link
+     */
+    LinkType getType();
+
+    /**
+     * @return begin index (inclusive) in the original input that this link starts at
+     */
+    int getBeginIndex();
+
+    /**
+     * @return end index (exclusive) in the original input that this link ends at; in other words, index of first
+     * character after link
+     */
+    int getEndIndex();
+
+}
@@ -0,0 +1,136 @@
+package org.nibor.autolink;
+
+import org.nibor.autolink.internal.EmailScanner;
+import org.nibor.autolink.internal.Scanner;
+import org.nibor.autolink.internal.UrlScanner;
+
+import java.util.*;
+
+/**
+ * Extracts links from input.
+ * <p>
+ * Create and configure an extractor using {@link #builder()}.
+ */
+public class LinkExtractor {
+
+    private static Scanner URL_SCANNER = new UrlScanner();
+    private static Scanner EMAIL_SCANNER = new EmailScanner();
+
+    private final Set<LinkType> linkTypes;
+
+    private LinkExtractor(Set<LinkType> linkTypes) {
+        this.linkTypes = linkTypes;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    /**
+     * Extract the links from the input. Can be called multiple times with different inputs, is thread-safe.
+     *
+     * @param input the input text, must not be {@code null}
+     * @return the links, in order that they appear in the input, never {@code null}
+     */
+    public List<Link> getLinks(CharSequence input) {
+        List<Link> links = new ArrayList<>();
+
+        int rewindIndex = 0;
+        int[] result = new int[2];
+
+        int i = 0;
+        int length = input.length();
+        while (i < length) {
+            Scanner scanner = trigger(input.charAt(i));
+            if (scanner != null) {
+                boolean found = scanner.scan(input, i, rewindIndex, result);
+                if (found) {
+                    links.add(new LinkImpl(scanner.getLinkType(), result[0], result[1]));
+                    rewindIndex = result[1];
+                    i = result[1];
+                } else {
+                    i++;
+                }
+            } else {
+                i++;
+            }
+        }
+        return links;
+    }
+
+    private Scanner trigger(char c) {
+        switch (c) {
+            case ':':
+                if (linkTypes.contains(LinkType.URL)) {
+                    return URL_SCANNER;
+                }
+                break;
+            case '@':
+                if (linkTypes.contains(LinkType.EMAIL)) {
+                    return EMAIL_SCANNER;
+                }
+        }
+        return null;
+    }
+
+    /**
+     * Builder for configuring link extractor.
+     */
+    public static class Builder {
+
+        private Set<LinkType> linkTypes = EnumSet.allOf(LinkType.class);
+
+        private Builder() {
+        }
+
+        /**
+         * @param linkTypes the link types that should be extracted (by default, all types are extracted)
+         * @return this builder
+         */
+        public Builder linkTypes(Set<LinkType> linkTypes) {
+            this.linkTypes = Objects.requireNonNull(linkTypes, "linkTypes must not be null");
+            return this;
+        }
+
+        /**
+         * @return the configured link extractor
+         */
+        public LinkExtractor build() {
+            return new LinkExtractor(linkTypes);
+        }
+    }
+
+    private static class LinkImpl implements Link {
+
+        private final LinkType linkType;
+        private final int beginIndex;
+        private final int endIndex;
+
+        private LinkImpl(LinkType linkType, int beginIndex, int endIndex) {
+            this.linkType = linkType;
+            this.beginIndex = beginIndex;
+            this.endIndex = endIndex;
+        }
+
+        @Override
+        public LinkType getType() {
+            return linkType;
+        }
+
+        @Override
+        public int getBeginIndex() {
+            return beginIndex;
+        }
+
+        @Override
+        public int getEndIndex() {
+            return endIndex;
+        }
+
+        @Override
+        public String toString() {
+            return "Link{type=" + getType() + ", beginIndex=" + beginIndex + ", endIndex=" + endIndex + "}";
+        }
+    }
+
+}
@@ -0,0 +1,7 @@
+package org.nibor.autolink;
+
+public interface LinkRenderer {
+
+    void render(Link link, StringBuilder sb);
+
+}
@@ -0,0 +1,5 @@
+package org.nibor.autolink;
+
+public enum LinkType {
+    URL, EMAIL
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +# IDEA
 +*.iml
 +/.idea/
++
 +# mvn
 +target/