From 708140f4a852aab226093c62c30ef30d788ef58d Mon Sep 17 00:00:00 2001
From: denkiwakame <denkivvakame@gmail.com>
Date: Tue, 28 May 2024 13:10:21 +0900
Subject: [PATCH 1/3] wip

---
 src/js/parsers.js | 107 ++++++++++++++++++++++++++++++++++++++++++++++
 src/js/popup.js   |  94 +++-------------------------------------
 2 files changed, 113 insertions(+), 88 deletions(-)
 create mode 100644 src/js/parsers.js

diff --git a/src/js/parsers.js b/src/js/parsers.js
new file mode 100644
index 0000000..c4fe269
--- /dev/null
+++ b/src/js/parsers.js
@@ -0,0 +1,107 @@
+// MIT License
+// Copyright (c) 2024 denkiwakame <denkivvakame@gmail.com>
+
+class URLParser {
+  constructor() {
+    this.parsers = [];
+  }
+
+  addParser(domain, handler) {
+    this.parsers.push({ domain, handler });
+  }
+
+  async parse(url) {
+    for (let { domain, handler } of this.parsers) {
+      if (url?.startsWith(domain)) return handler(url);
+    }
+    throw new Error('No perser found for the given URL');
+  }
+}
+
+const openReviewParser = async (url) => {
+  const id = new URLSearchParams(new URL(url).search).get('id');
+  console.error(id);
+
+  const res = await fetch(url);
+  const html = await res.text();
+  const parser = new DOMParser();
+  const xml = parser.parseFromString(html, 'text/html');
+  console.error(xml);
+
+  const authorsArray = Array.from(
+    xml.querySelectorAll('meta[name="citation_author"]'),
+    (author) => author.getAttribute('content')
+  );
+  const authors = authorsArray.length ? authorsArray : ['Anonymous'];
+
+  const paperTitle = xml
+    .querySelector('meta[name="citation_title"]')
+    .getAttribute('content');
+
+  const abst = xml
+    .querySelector('meta[name="citation_abstract"]')
+    .getAttribute('content');
+
+  const date = xml
+    .querySelector('meta[name="citation_online_date"]')
+    .getAttribute('content');
+  // -> ISO 8601 date string
+  const published = new Date(date).toISOString().split('T')[0];
+  const comment = 'none';
+
+  return {
+    id: id,
+    title: paperTitle,
+    abst: abst,
+    authors: authors,
+    url: url,
+    published: published,
+    comment: comment,
+    publisher: 'OpenReview',
+  };
+};
+
+const arXivParser = async (url) => {
+  const ARXIV_API = 'http://export.arxiv.org/api/query/search_query';
+  // ref: https://info.arxiv.org/help/arxiv_identifier.html
+  // e.g. (new id format: 2404.16782) | (old id format: hep-th/0702063)
+  const parseArXivId = (str) => str.match(/(\d+\.\d+$)|((\w|-)+\/\d+$)/)?.[0];
+
+  const paperId = parseArXivId(url);
+  const res = await fetch(ARXIV_API + '?id_list=' + paperId.toString());
+  if (res.status != 200) {
+    console.error('arXiv API request failed');
+    return;
+  }
+  const data = await res.text(); // TODO: error handling
+  console.log(res.status);
+  const xmlData = new window.DOMParser().parseFromString(data, 'text/xml');
+  console.log(xmlData);
+
+  const entry = xmlData.querySelector('entry');
+  const id = parseArXivId(entry.querySelector('id')?.textContent);
+  const paperTitle = entry.querySelector('title').textContent;
+  const abst = entry.querySelector('summary').textContent;
+  const authors = Array.from(entry.querySelectorAll('author')).map((author) => {
+    return author.textContent.trim();
+  });
+  const published = entry.querySelector('published').textContent;
+  const comment = entry.querySelector('comment')?.textContent ?? 'none';
+
+  return {
+    id: id,
+    title: paperTitle,
+    abst: abst,
+    authors: authors,
+    url: url,
+    published: published,
+    comment: comment,
+    publisher: 'arXiv',
+  };
+};
+
+const urlParser = new URLParser();
+urlParser.addParser('https://openreview.net/', openReviewParser);
+urlParser.addParser('https://arxiv.org', arXivParser);
+
+export default urlParser;
diff --git a/src/js/popup.js b/src/js/popup.js
index a975497..47e1b0e 100644
--- a/src/js/popup.js
+++ b/src/js/popup.js
@@ -7,11 +7,12 @@ import Icons from 'uikit/dist/js/uikit-icons';
 import Mustache from 'mustache';
 import NotionClient from './notion.js';
 import thenChrome from 'then-chrome';
+import urlParser from './parsers.js';
 
 UIKit.use(Icons);
 
 const TEST_URL = 'https://arxiv.org/abs/2308.04079';
-const ARXIV_API = 'http://export.arxiv.org/api/query/search_query';
+
 class UI {
   constructor() {
     this.setupProgressBar();
@@ -97,13 +98,11 @@ class UI {
     return url && url.split('.').pop() === 'pdf';
   }
   async getPaperInfo(url) {
-    if (this.isArxivUrl(url)) return this.getArXivInfo(url);
-    if (this.isOpenReviewUrl(url)) return this.getOpenReviewInfo(url);
+    this.showProgressBar();
+    const data = await urlParser.parse(url);
+    this.setFormContents(data.title, data.abst, data.comment, data.authors);
+    return data;
   }
-  // ref: https://info.arxiv.org/help/arxiv_identifier.html
-  // e.g. (new id format: 2404.16782) | (old id format: hep-th/0702063)
-  parseArXivId = (str) => str.match(/(\d+\.\d+$)|((\w|-)+\/\d+$)/)?.[0];
-
   setFormContents(paperTitle, abst, comment, authors) {
     document.getElementById('js-title').value = paperTitle;
     document.getElementById('js-abst').value = abst;
@@ -118,87 +117,6 @@ class UI {
     });
   }
 
-  async getArXivInfo(url) {
-    this.showProgressBar();
-    const paperId = this.parseArXivId(url);
-
-    const res = await fetch(ARXIV_API + '?id_list=' + paperId.toString());
-    if (res.status != 200) {
-      console.error('arXiv API request failed');
-      return;
-    }
-    const data = await res.text(); // TODO: error handling
-    console.log(res.status);
-    const xmlData = new window.DOMParser().parseFromString(data, 'text/xml');
-    console.log(xmlData);
-
-    const entry = xmlData.querySelector('entry');
-    const id = this.parseArXivId(entry.querySelector('id')?.textContent);
-    const paperTitle = entry.querySelector('title').textContent;
-    const abst = entry.querySelector('summary').textContent;
-    const authors = Array.from(entry.querySelectorAll('author')).map(
-      (author) => {
-        return author.textContent.trim();
-      }
-    );
-    const published = entry.querySelector('published').textContent;
-    const comment = entry.querySelector('comment')?.textContent ?? 'none';
-    this.setFormContents(paperTitle, abst, comment, authors);
-    return {
-      id: id,
-      title: paperTitle,
-      abst: abst,
-      authors: authors,
-      url: url,
-      published: published,
-      comment: comment,
-      publisher: 'arXiv',
-    };
-  }
-
-  async getOpenReviewInfo(url) {
-    this.showProgressBar();
-    const id = new URLSearchParams(new URL(url).search).get('id');
-
-    const res = await fetch(url);
-    const html = await res.text();
-    const parser = new DOMParser();
-    const xml = parser.parseFromString(html, 'text/html');
-
-    const authorsArray = Array.from(
-      xml.querySelectorAll('meta[name="citation_author"]'),
-      (author) => author.getAttribute('content')
-    );
-    const authors = authorsArray.length ? authorsArray : ['Anonymous'];
-
-    const paperTitle = xml
-      .querySelector('meta[name="citation_title"]')
-      .getAttribute('content');
-
-    const abst = xml
-      .querySelector('meta[name="citation_abstract"]')
-      .getAttribute('content');
-
-    const date = xml
-      .querySelector('meta[name="citation_publication_date"]')
-      .getAttribute('content');
-    // -> ISO 8601 date string
-    const published = new Date(date).toISOString().split('T')[0];
-    const comment = 'none';
-
-    this.setFormContents(paperTitle, abst, comment, authors);
-    return {
-      id: id,
-      title: paperTitle,
-      abst: abst,
-      authors: authors,
-      url: url,
-      published: published,
-      comment: comment,
-      publisher: 'OpenReview',
-    };
-  }
-
   renderMessage(type, message, overwrite = false) {
     // type: warning, danger, success, primary
     const template = `<div class="uk-alert-{{type}}" uk-alert><a class="uk-alert-close" uk-close></a><p>{{message}}</p></div>`;

From df7d0bfd49c8672fd3413028c6ad76839e3b330a Mon Sep 17 00:00:00 2001
From: denkiwakame <denkivvakame@gmail.com>
Date: Tue, 28 May 2024 14:27:07 +0900
Subject: [PATCH 2/3] add ACL anthology URL to allow CORS from chrome extension

---
 manifest.json | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/manifest.json b/manifest.json
index 69cc628..95a4a34 100644
--- a/manifest.json
+++ b/manifest.json
@@ -20,7 +20,8 @@
   "host_permissions": [
     "*://api.notion.com/*",
     "*://www.notion.so/*",
-    "*://openreview.net/*"
+    "*://openreview.net/*",
+    "*://aclanthology.org/*"
   ],
   "content_security_policy": {
     "extension_pages": "script-src 'self'; object-src 'self'"

From 191924b6359d05d5cb094c798420c760319d6132 Mon Sep 17 00:00:00 2001
From: denkiwakame <denkivvakame@gmail.com>
Date: Tue, 28 May 2024 14:27:46 +0900
Subject: [PATCH 3/3] add ACL anthology parser

---
 src/js/parsers.js | 96 +++++++++++++++++++++++++++++++++--------------
 src/js/popup.js   |  1 +
 2 files changed, 69 insertions(+), 28 deletions(-)

diff --git a/src/js/parsers.js b/src/js/parsers.js
index c4fe269..27e221c 100644
--- a/src/js/parsers.js
+++ b/src/js/parsers.js
@@ -18,15 +18,51 @@ class URLParser {
   }
 }
 
+const arXivParser = async (url) => {
+  const ARXIV_API = 'http://export.arxiv.org/api/query/search_query';
+  // ref: https://info.arxiv.org/help/arxiv_identifier.html
+  // e.g. (new id format: 2404.16782) | (old id format: hep-th/0702063)
+  const parseArXivId = (str) => str.match(/(\d+\.\d+$)|((\w|-)+\/\d+$)/)?.[0];
+
+  const paperId = parseArXivId(url);
+  const res = await fetch(ARXIV_API + '?id_list=' + paperId.toString());
+  if (res.status != 200) {
+    console.error('arXiv API request failed');
+    return;
+  }
+  const data = await res.text(); // TODO: error handling
+  console.log(res.status);
+  const xmlData = new window.DOMParser().parseFromString(data, 'text/xml');
+  console.log(xmlData);
+
+  const entry = xmlData.querySelector('entry');
+  const id = parseArXivId(entry.querySelector('id')?.textContent);
+  const paperTitle = entry.querySelector('title').textContent;
+  const abst = entry.querySelector('summary').textContent;
+  const authors = Array.from(entry.querySelectorAll('author')).map((author) => {
+    return author.textContent.trim();
+  });
+  const published = entry.querySelector('published').textContent;
+  const comment = entry.querySelector('comment')?.textContent ?? 'none';
+
+  return {
+    id: id,
+    title: paperTitle,
+    abst: abst,
+    authors: authors,
+    url: url,
+    published: published,
+    comment: comment,
+    publisher: 'arXiv',
+  };
+};
+
 const openReviewParser = async (url) => {
   const id = new URLSearchParams(new URL(url).search).get('id');
-  console.error(id);
-
   const res = await fetch(url);
   const html = await res.text();
   const parser = new DOMParser();
   const xml = parser.parseFromString(html, 'text/html');
-  console.error(xml);
 
   const authorsArray = Array.from(
     xml.querySelectorAll('meta[name="citation_author"]'),
@@ -61,33 +97,36 @@ const openReviewParser = async (url) => {
   };
 };
 
-const arXivParser = async (url) => {
-  const ARXIV_API = 'http://export.arxiv.org/api/query/search_query';
-  // ref: https://info.arxiv.org/help/arxiv_identifier.html
-  // e.g. (new id format: 2404.16782) | (old id format: hep-th/0702063)
-  const parseArXivId = (str) => str.match(/(\d+\.\d+$)|((\w|-)+\/\d+$)/)?.[0];
+const aclAnthologyParser = async (url) => {
+  const res = await fetch(url);
+  const html = await res.text();
+  const parser = new DOMParser();
+  const xml = parser.parseFromString(html, 'text/html');
 
-  const paperId = parseArXivId(url);
-  const res = await fetch(ARXIV_API + '?id_list=' + paperId.toString());
-  if (res.status != 200) {
-    console.error('arXiv API request failed');
-    return;
-  }
-  const data = await res.text(); // TODO: error handling
-  console.log(res.status);
-  const xmlData = new window.DOMParser().parseFromString(data, 'text/xml');
-  console.log(xmlData);
+  const id = xml
+    .querySelector('meta[name="citation_doi"]')
+    .getAttribute('content');
+  const authors = Array.from(
+    xml.querySelectorAll('meta[name="citation_author"]'),
+    (author) => author.getAttribute('content')
+  );
 
-  const entry = xmlData.querySelector('entry');
-  const id = parseArXivId(entry.querySelector('id')?.textContent);
-  const paperTitle = entry.querySelector('title').textContent;
-  const abst = entry.querySelector('summary').textContent;
-  const authors = Array.from(entry.querySelectorAll('author')).map((author) => {
-    return author.textContent.trim();
-  });
-  const published = entry.querySelector('published').textContent;
-  const comment = entry.querySelector('comment')?.textContent ?? 'none';
+  const paperTitle = xml
+    .querySelector('meta[name="citation_title"]')
+    .getAttribute('content');
 
+  const abst = 'none';
+  const date = xml
+    .querySelector('meta[name="citation_publication_date"]')
+    .getAttribute('content');
+  // -> ISO 8601 date string
+  const published = new Date(date).toISOString().split('T')[0];
+  const publisher = xml
+    .querySelectorAll('.acl-paper-details dd')[6]
+    .textContent.replaceAll('\n', '');
+  const comment = xml
+    .querySelector('meta[name="citation_pdf_url"]')
+    .getAttribute('content');
   return {
     id: id,
     title: paperTitle,
@@ -96,12 +135,13 @@ const arXivParser = async (url) => {
     url: url,
     published: published,
     comment: comment,
-    publisher: 'arXiv',
+    publisher: publisher,
   };
 };
 
 const urlParser = new URLParser();
 urlParser.addParser('https://openreview.net/', openReviewParser);
 urlParser.addParser('https://arxiv.org', arXivParser);
+urlParser.addParser('https://aclanthology.org', aclAnthologyParser);
 
 export default urlParser;
diff --git a/src/js/popup.js b/src/js/popup.js
index 47e1b0e..efad053 100644
--- a/src/js/popup.js
+++ b/src/js/popup.js
@@ -12,6 +12,7 @@ import urlParser from './parsers.js';
 UIKit.use(Icons);
 
 const TEST_URL = 'https://arxiv.org/abs/2308.04079';
+// const TEST_URL = 'https://aclanthology.org/2023.ijcnlp-main.1/';
 
 class UI {
   constructor() {