Merge pull request #7088 from ethereum/markdownCheckr

minimalsm · web-flow · commit 6c8b68fc697f · 2022-08-08T21:33:48.000+01:00
Add a markdown checker
diff --git a/package.json b/package.json
@@ -85,6 +85,7 @@
     "@types/styled-system": "^5.1.15",
     "babel-preset-gatsby": "^2.14.0",
     "github-slugger": "^1.3.0",
+    "gray-matter": "^4.0.3",
     "husky": "^4.2.5",
     "identity-obj-proxy": "^3.0.0",
     "minimist": "^1.2.6",
@@ -102,6 +103,7 @@
     "crowdin-clean": "rm -rf .crowdin && mkdir .crowdin",
     "crowdin-import": "ts-node src/scripts/crowdin-import.ts",
     "format": "prettier --write \"**/*.{js,jsx,json,md}\"",
+    "markdown-checker": "node src/scripts/markdown-checker.js",
     "generate-heading-ids": "ts-node --esm src/scripts/generateHeadingIds.mts",
     "start": "gatsby develop",
     "start:lambda": "netlify-lambda serve src/lambda",
diff --git a/src/content/community/get-involved/index.md b/src/content/community/get-involved/index.md
@@ -100,7 +100,6 @@ The Ethereum ecosystem is on a mission to fund public goods and impactful projec
 - [Web3 Army](https://web3army.xyz/)
 - [Crypto Valley Jobs](https://cryptovalley.jobs/)
 
-
 ## Join a DAO {#decentralized-autonomous-organizations-daos}
 
 "DAOs" are decentralized autonomous organizations. These groups leverage Ethereum technology to facilitate organization and collaboration. For instance, for controlling membership, voting on proposals, or managing pooled assets. While DAOs are still experimental, they offer opportunities for you to find groups that you identify with, find collaborators, and grow your impact on the Ethereum community. [More on DAOs](/dao/)
diff --git a/src/content/community/research/index.md b/src/content/community/research/index.md
@@ -392,7 +392,7 @@ Decentralizing the entire Ethereum tech stack is an important research area. Cur
 
 #### Background reading {#background-reading-20}
 
-- [Ethereum stack](/developers/docs/ethereum-stack/) 
+- [Ethereum stack](/developers/docs/ethereum-stack/)
 - [Coinbase: Intro to Web3 Stack](https://blog.coinbase.com/a-simple-guide-to-the-web3-stack-785240e557f0)
 - [Introduction to smart contracts](/developers/docs/smart-contracts/)
 - [Introduction to decentralized storage](/developers/docs/storage/)
diff --git a/src/content/desci/index.md b/src/content/desci/index.md
@@ -22,8 +22,8 @@ DeSci aims to create an ecosystem where scientists are incentivized to openly sh
 Decentralized science allows for more diverse funding sources (from [DAOs](/dao/), [quadratic donations](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2003531) to crowdfunding and more), more accessible access data and methods, and by providing incentives for reproducibility.
 
 ### Juan Benet - DeSci, Independent Labs, & Large Scale Data Science
-<iframe width="560" height="315" src="https://www.youtube.com/embed/zkXM9H90g_E" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 
+<iframe width="560" height="315" src="https://www.youtube.com/embed/zkXM9H90g_E" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
 
 ## How DeSci improves science {#desci-improves-science}
 
diff --git a/src/content/developers/docs/nodes-and-clients/nodes-as-a-service/index.md b/src/content/developers/docs/nodes-and-clients/nodes-as-a-service/index.md
@@ -167,11 +167,11 @@ Here is a list of some of the most popular Ethereum node providers, feel free to
   - [Docs](https://documenter.getpostman.com/view/13630829/TVmFkLwy)
   - Features
     - Access to 50+ blockchain nodes
-    - Free API Key 
+    - Free API Key
     - Block Explorers
     - API Response Time ⩽ 1 sec
     - 24/7 Support Team
-    - Personal Account Manager 
+    - Personal Account Manager
     - Shared, archive, backup and dedicated nodes
 - [**Pocket Network**](https://www.pokt.network/)
   - [Docs](https://docs.pokt.network/home/)
diff --git a/src/content/developers/docs/programming-languages/python/index.md b/src/content/developers/docs/programming-languages/python/index.md
@@ -82,7 +82,7 @@ The following Ethereum-based projects use tools mentioned on this page. The rela
 ## Python Community discussion {#python-community-contributors}
 
 - [Ethereum Python Community Discord](https://discord.gg/9zk7snTfWe) for Web3.py and other Python framework discussion
-- [Vyper Discord]([https://discord.gg/9zk7snTfWe](https://discord.gg/SdvKC79cJk)) for Vyper smart contract programming disucssion
+- [Vyper Discord](<[https://discord.gg/9zk7snTfWe](https://discord.gg/SdvKC79cJk)>) for Vyper smart contract programming disucssion
 
 ## Other aggregated lists {#other-aggregated-lists}
 
diff --git a/src/content/developers/docs/smart-contracts/formal-verification/index.md b/src/content/developers/docs/smart-contracts/formal-verification/index.md
diff --git a/src/content/developers/docs/smart-contracts/languages/index.md b/src/content/developers/docs/smart-contracts/languages/index.md
@@ -113,11 +113,10 @@ For more information, [read the Vyper rationale](https://vyper.readthedocs.io/en
 - [Smart contract development frameworks and tools for Vyper](/developers/docs/programming-languages/python/)
 - [VyperPunk - learn to secure and hack Vyper smart contracts](https://github.com/SupremacyTeam/VyperPunk)
 - [VyperExamples - Vyper vulnerability examples](https://www.vyperexamples.com/reentrancy)
-- [Vyper Hub for development](https://github.com/zcor/vyper-dev) 
+- [Vyper Hub for development](https://github.com/zcor/vyper-dev)
 - [Vyper greatest hits smart contract examples](https://github.com/pynchmeister/vyper-greatest-hits/tree/main/contracts)
 - [Awesome Vyper curated resources](https://github.com/spadebuilders/awesome-vyper)
 
-
 ### Example {#example}
 
 ```python
diff --git a/src/content/translations/fa/developers/docs/intro-to-ether/index.md b/src/content/translations/fa/developers/docs/intro-to-ether/index.md
@@ -42,7 +42,7 @@ sidebar: true
 سوختن اتر در تمام تراکنش‌ها روی اتریوم رخ می‌دهد. وقتی هزینه تراکنش کاربران پرداخت می شود، یک هزینه پایه با توجه به تقاضای شبکه ثبت شده و از چرخه خارج می شود که به همراه حداکثر کارمزد گاز و اندازه متغیر [بلوک] (https://etherscan.io/block/12965263)، کارمزد نهایی تراکنش را مشخص می کند. وقتی تقاضای شبکه زیاد باشد، میزان اتر سوزانده شده از آنچه که استخراج می شود بیشتر شده و از تولید مقدار زیاد آن جلوگیری می گند.
 
 سوزاندن کارمزد پایه از راه‌های مختلفی که ماینرها می‌توانند از آن برای دستکاری شبکه استفاده کنند، جلوگیری می‌کند. برای مثال اگر ماینرها کارمزد پایه را دریافت می کردند، می توانستند تراکنش های خود را به صورت رایگان درج کنند و کارمزد پایه را برای بقیه افزایش دهند. از طرف دیگر، آنها می توانند کارمزد پایه را به برخی از کاربران خارج از زنجیره بازپرداخت کنند، که منجر به بازار کارمزد تراکنش مبهم و پیچیده تر می شود.
- 
+
 ## واحدهای خرد اتر {#denominations}
 
 از آنجایی که بسیاری از تراکنش‌ها در اتریوم کوچک هستند، اتر دارای چندین واحد شمارش است که ممکن است برای مقادیر کمتر به آن‌ها اشاره شود. از میان این واحدهای شمارش، Wei و gwei از اهمیت ویژه‌ای برخوردارند.
diff --git a/src/scripts/markdown-checker.js b/src/scripts/markdown-checker.js
@@ -0,0 +1,219 @@
+const fs = require("fs")
+const path = require("path")
+const matter = require("gray-matter")
+const argv = require("minimist")(process.argv.slice(2))
+
+const LANG_ARG = argv.lang || null
+const PATH_TO_INTL_MARKDOWN = "./src/content/translations/"
+const PATH_TO_ALL_CONTENT = "./src/content/"
+const TUTORIAL_DATE_REGEX = new RegExp("\\d{4}-\\d{2}-\\d{2}")
+const WHITE_SPACE_IN_LINK_TEXT = new RegExp(
+  "\\[\\s.+\\]\\( | \\[.+\\s\\]\\(",
+  "g"
+)
+const BROKEN_LINK_REGEX = new RegExp(
+  "\\[[^\\]]+\\]\\([^\\)\\s]+\\s[^\\)]+\\)",
+  "g"
+)
+const HTML_TAGS = ["</code", "</p>"]
+const SPELLING_MISTAKES = [
+  "Ethreum",
+  "Etherum",
+  "Etherium",
+  "Etheruem",
+  "Etereum",
+  "Eterium",
+  "Etherem",
+  "Etheerum",
+  "Ehtereum",
+  "Eferum",
+]
+const CASE_SENSITVE_SPELLING_MISTAKES = ["Thereum", "Metamask", "Github"]
+// Ideas:
+// Regex for explicit lang path (e.g. /en/) && for glossary links (trailing slash breaks links e.g. /glossary/#pos/ doesn't work)
+// We should have case sensitive spelling mistakes && check they are not in links.
+
+const langsArray = fs.readdirSync(PATH_TO_INTL_MARKDOWN)
+langsArray.push("en")
+
+function getAllMarkdownPaths(dirPath, arrayOfMarkdownPaths = []) {
+  let files = fs.readdirSync(dirPath)
+
+  arrayOfMarkdownPaths = arrayOfMarkdownPaths || []
+
+  for (const file of files) {
+    if (fs.statSync(dirPath + "/" + file).isDirectory()) {
+      arrayOfMarkdownPaths = getAllMarkdownPaths(
+        dirPath + "/" + file,
+        arrayOfMarkdownPaths
+      )
+    } else {
+      const filePath = path.join(dirPath, "/", file)
+
+      if (filePath.includes(".md")) {
+        arrayOfMarkdownPaths.push(filePath)
+      }
+    }
+  }
+
+  return arrayOfMarkdownPaths
+}
+
+function sortMarkdownPathsIntoLanguages(files) {
+  const languages = langsArray.reduce((accumulator, value) => {
+    return { ...accumulator, [value]: [] }
+  }, {})
+
+  for (const file of files) {
+    const isTranslation = file.includes("/translations/")
+    const langIndex = file.indexOf("/translations/") + 14
+    const isFourCharLang = file.includes("pt-br") || file.includes("zh-tw")
+    const charactersToSlice = isFourCharLang ? 5 : 2
+
+    const lang = isTranslation
+      ? file.slice(langIndex, langIndex + charactersToSlice)
+      : "en"
+
+    if (LANG_ARG) {
+      if (LANG_ARG === lang) {
+        languages[lang].push(file)
+      }
+    } else {
+      languages[lang].push(file)
+    }
+  }
+
+  return languages
+}
+
+function processFrontmatter(path, lang) {
+  const file = fs.readFileSync(path, "utf-8")
+  const frontmatter = matter(file).data
+
+  if (!frontmatter.title) {
+    console.warn(`Missing 'title' frontmatter at ${path}:`)
+  }
+  // Description commented out as there are a lot of them missing :-)!
+  // if (!frontmatter.description) {
+  //   console.warn(`Missing 'description' frontmatter at ${path}:`)
+  // }
+  if (!frontmatter.lang) {
+    console.error(`Missing 'lang' frontmatter at ${path}: Expected: ${lang}:'`)
+  } else if (!(frontmatter.lang === lang)) {
+    console.error(
+      `Invalid 'lang' frontmatter at ${path}: Expected: ${lang}'. Received: ${frontmatter.lang}.`
+    )
+  }
+
+  if (path.includes("/tutorials/")) {
+    if (!frontmatter.published) {
+      console.warn(`Missing 'published' frontmatter at ${path}:`)
+    } else {
+      try {
+        let stringDate = frontmatter.published.toISOString().slice(0, 10)
+        const dateIsFormattedCorrectly = TUTORIAL_DATE_REGEX.test(stringDate)
+
+        if (!dateIsFormattedCorrectly) {
+          console.warn(
+            `Invalid 'published' frontmatter at ${path}: Expected: 'YYYY-MM-DD' Received: ${frontmatter.published}`
+          )
+        }
+      } catch (e) {
+        console.warn(
+          `Invalid 'published' frontmatter at ${path}: Expected: 'YYYY-MM-DD' Received: ${frontmatter.published}`
+        )
+      }
+    }
+  }
+}
+
+function processMarkdown(path) {
+  const markdownFile = fs.readFileSync(path, "utf-8")
+  let brokenLinkMatch
+
+  while ((brokenLinkMatch = BROKEN_LINK_REGEX.exec(markdownFile))) {
+    const lineNumber = getLineNumber(markdownFile, brokenLinkMatch.index)
+    console.warn(`Broken link found: ${path}:${lineNumber}`)
+
+    // if (!BROKEN_LINK_REGEX.global) break
+  }
+
+  // TODO: refactor history pages to use a component for network upgrade summaries
+  // TODO: create .env commit warning component for tutorials
+  // Ignore tutorials with Javascript and ExpandableCards
+  /* Commented this out due to console noise (but they are things we should fix!)
+  if (!(path.includes("/history/")) && !(markdownFile.includes("```javascript")) && !(markdownFile.includes("ExpandableCard"))) {
+    for (const tag of HTML_TAGS) {
+    
+      const htmlTagRegex = new RegExp(tag, "g")
+      let htmlTagMatch
+
+      while ((htmlTagMatch = htmlTagRegex.exec(markdownFile))) {
+        const lineNumber = getLineNumber(markdownFile, htmlTagMatch.index)
+        console.warn(`Warning: ${tag} tag in markdown at ${path}:${lineNumber}`)
+    
+        if (!htmlTagRegex.global) break
+      }
+    }
+  }
+  */
+
+  // Commented out as 296 instances of whitespace in link texts
+  // let whiteSpaceInLinkTextMatch
+
+  // while ((whiteSpaceInLinkTextMatch = WHITE_SPACE_IN_LINK_TEXT.exec(markdownFile))) {
+  //   const lineNumber = getLineNumber(markdownFile, whiteSpaceInLinkTextMatch.index)
+  //   console.warn(`White space in link found: ${path}:${lineNumber}`)
+  // }
+
+  checkMarkdownSpellingMistakes(path, markdownFile, SPELLING_MISTAKES)
+  // Turned this off for testing as there are lots of Github (instead of GitHub) and Metamask (instead of MetaMask).
+  // checkMarkdownSpellingMistakes(path, markdownFile, CASE_SENSITVE_SPELLING_MISTAKES, true)
+}
+
+function checkMarkdownSpellingMistakes(
+  path,
+  file,
+  spellingMistakes,
+  caseSensitive = false
+) {
+  for (const mistake of spellingMistakes) {
+    const mistakeRegex = caseSensitive
+      ? new RegExp(mistake, "g")
+      : new RegExp(mistake, "gi")
+    let spellingMistakeMatch
+
+    while ((spellingMistakeMatch = mistakeRegex.exec(file))) {
+      const lineNumber = getLineNumber(file, spellingMistakeMatch.index)
+      console.warn(
+        `Spelling mistake "${mistake}" found at ${path}:${lineNumber}`
+      )
+    }
+
+    if (!mistakeRegex.global) break
+  }
+}
+
+function getLineNumber(file, index) {
+  const fileSubstring = file.substring(0, index)
+  const lines = fileSubstring.split("\n")
+  const linePosition = lines.length
+  const charPosition = lines[lines.length - 1].length + 1
+  const lineNumber = `${linePosition}:${charPosition}`
+
+  return lineNumber
+}
+
+function checkMarkdown() {
+  const markdownPaths = getAllMarkdownPaths(PATH_TO_ALL_CONTENT)
+  const markdownPathsByLang = sortMarkdownPathsIntoLanguages(markdownPaths)
+
+  for (const lang in markdownPathsByLang) {
+    for (const path of markdownPathsByLang[lang]) {
+      processFrontmatter(path, lang)
+      processMarkdown(path)
+    }
+  }
+}
+
+checkMarkdown()
diff --git a/yarn.lock b/yarn.lock
@@ -9507,7 +9507,7 @@ graphql@^15.7.2:
   resolved "https://registry.yarnpkg.com/graphql/-/graphql-15.8.0.tgz#33410e96b012fa3bdb1091cc99a94769db212b38"
   integrity sha512-5gghUc24tP9HRznNpV2+FIoq3xKkj5dTQqf4v0CpdPbFVwFkWoxOM+o+2OC9ZSvjEMTjfmG9QT+gcvggTwW1zw==
 
-gray-matter@^4.0.2:
+gray-matter@^4.0.2, gray-matter@^4.0.3:
   version "4.0.3"
   resolved "https://registry.yarnpkg.com/gray-matter/-/gray-matter-4.0.3.tgz#e893c064825de73ea1f5f7d88c7a9f7274288798"
   integrity sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==