harpocrates
diff --git a/‎.gitignore
Lines changed: 22 additions & 0 deletions b/‎.gitignore
Lines changed: 22 additions & 0 deletions
diff --git a/‎aa.rs
Lines changed: 2 additions & 0 deletions b/‎aa.rs
Lines changed: 2 additions & 0 deletions
diff --git a/‎bench/README.md
Lines changed: 35 additions & 0 deletions b/‎bench/README.md
Lines changed: 35 additions & 0 deletions
diff --git a/‎benchmarks/allocation-benchmarks/Main.hs renamed to ‎bench/allocation-benchmarks/Main.hs
Lines changed: 9 additions & 4 deletions b/‎benchmarks/allocation-benchmarks/Main.hs renamed to ‎bench/allocation-benchmarks/Main.hs
Lines changed: 9 additions & 4 deletions
diff --git a/‎bench.py renamed to ‎bench/bench.py
Lines changed: 44 additions & 2 deletions b/‎bench.py renamed to ‎bench/bench.py
Lines changed: 44 additions & 2 deletions
diff --git a/‎benchmarks/timing-benchmarks/Main.hs renamed to ‎bench/timing-benchmarks/Main.hs
Lines changed: 15 additions & 7 deletions b/‎benchmarks/timing-benchmarks/Main.hs renamed to ‎bench/timing-benchmarks/Main.hs
Lines changed: 15 additions & 7 deletions
diff --git a/‎get-rust-sources.sh
Lines changed: 68 additions & 0 deletions b/‎get-rust-sources.sh
Lines changed: 68 additions & 0 deletions
diff --git a/‎language-rust.cabal
Lines changed: 5 additions & 4 deletions b/‎language-rust.cabal
Lines changed: 5 additions & 4 deletions
diff --git a/‎src/Language/Rust/Parser.hs
Lines changed: 3 additions & 4 deletions b/‎src/Language/Rust/Parser.hs
Lines changed: 3 additions & 4 deletions
@@ -0,0 +1,22 @@
+# Stack related files
+.stack-work/
+stack.yaml
+
+# Benchmark output folders
+bench/allocations/
+bench/timings/
+
+# Sample source files
+sample-sources/
+!sample-sources/attributes.rs
+!sample-sources/empty.rs
+!sample-sources/expressions.rs
+!sample-sources/items.rs
+!sample-sources/let.rs
+!sample-sources/literals.rs
+!sample-sources/macros.rs
+!sample-sources/patterns.rs
+!sample-sources/precedences.rs
+!sample-sources/statement-expressions.rs
+!sample-sources/statements.rs
+!sample-sources/types.rs
@@ -0,0 +1,2 @@
+mod foo {}
+
@@ -0,0 +1,35 @@
+We have two types of benchmarks. If you are using `stack` you can run them with
+
+```
+$ stack bench                         # runs all benchmarks
+$ stack bench :allocation-benchmarks  # runs allocation benchmarks only (faster)
+$ stack bench :timing-benchmarks      # runs timing benchmarks only (slower)
+```
+
+## `allocation-benchmarks`
+
+Benchmarks how much memory is allocated by the runtime when parsing the files inside of the
+`sample-sources` directory at the project root. Resulting information is stored in a JSON file in
+the `allocations` folder (automatically created in this directory).
+
+## `timimng-benchmarks`
+
+Benchmark how long it takes to parse the files inside the `sample-sources` directory. Resulting
+information is stored in a JSON file in the `timings` folder (automatically created in this
+directory).
+
+# Tools
+
+Since some of these tests take a while, you can add a `.benchignore` file in `sample-sources` which
+lists files to skip for benchmarking (one file name per line). 
+
+There is also a `bench.py` utility in this directory which lets you compare benchmarks across
+different commits. It relies on the JSON files in `allocations` and `timings`, so you will have to
+checkout and run the benchmarks on commits you want to compare against (to generate the 
+corresponding JSON file).
+
+```
+$ ./bench.py --folder allocations      # compare the last several commits for allocations
+$ ./bench.py --folder timings          # compare the last several commits for timings
+```
+
@@ -32,11 +32,16 @@ main = do
   -- Get the test cases
   workingDirectory <- getCurrentDirectory
   let sampleSources = workingDirectory </> "sample-sources"
+      benchIgnore = sampleSources </> ".benchignore"
+  benchIgnoreExists <- doesFileExist benchIgnore
+  ignore <- if benchIgnoreExists
+              then (\f -> map (sampleSources </>) (lines f)) <$> readFile benchIgnore
+              else pure []
   entries <- map (sampleSources </>) <$> listDirectory sampleSources
-  files <- filterM doesFileExist entries
+  files <- filterM doesFileExist (filter (`notElem` ignore) entries)
 
   -- Clear out previous WIP (if there is one)
-  catch (removeFile (workingDirectory </> "allocations" </> "WIP" <.> "json"))
+  catch (removeFile (workingDirectory </> "bench" </> "allocations" </> "WIP" <.> "json"))
         (\e -> if isDoesNotExistError e then pure () else throwIO e)
 
   -- Run 'weigh' tests
@@ -57,8 +62,8 @@ main = do
                        ]
 
   -- Save the output to JSON
-  createDirectoryIfMissing False (workingDirectory </> "allocations")
-  let logFile = workingDirectory </> "allocations" </> logFileName <.> "json"
+  createDirectoryIfMissing False (workingDirectory </> "bench" </> "allocations")
+  let logFile = workingDirectory </> "bench" </> "allocations" </> logFileName <.> "json"
   putStrLn $ "writing results to: " ++ logFile
   logFile `BL.writeFile` encode results
 
@@ -58,12 +58,55 @@ def flattenListDict(d, indent=0):
 
 # Currently not used...
 def fmtSize(num):
+    """format a number of bytes on disk into a human readable form"""
     for unit in ['','KB','MB','GB','TB','PB','EB','ZB']:
         if abs(num) < 1024.0:
             return "%3.1f%s" % (num, unit)
         num /= 1024.0
     return "%.1f%s%s" % (num, 'YB', suffix)
 
+def revParse(commit, useAbbreviated=False):
+    """get the hash for a commit"""
+    abbreviated = subprocess.run(
+        ["git", "rev-parse", "--abbrev-ref", commit],
+        stdout=subprocess.PIPE,
+        check=True
+    ).stdout.decode("utf8").strip()
+
+    other = subprocess.run(
+        ["git", "rev-parse", commit],
+        stdout=subprocess.PIPE,
+        check=True
+    ).stdout.decode("utf8").strip()
+
+    return (useAbbreviated and abbreviated) or other
+
+# Run benchmarks for a commit
+def runBenchmarks(commit):
+    """temporarily check out the given commit to run the benchmarks"""
+
+    print("Running benchmarks for '" + commit + "'")
+    commit = revParse(commit)
+    print('\033[31m' + "Do not make any changes to files!" + '\033[0m')
+    init = revParse("HEAD")
+
+    localChanges = "No local changes to save\n" != subprocess.run(
+        ["git", "status"],
+        stdout=subprocess.PIPE
+    ).stdout
+
+    if localChanges:
+        subprocess.run(["git", "stash"], stdout=subprocess.PIPE)
+
+    subprocess.run(["git", "checkout", commit])
+    subprocess.run(["stack", "bench"])
+    subprocess.run(["git", "checkout", init])
+
+    if localChanges:
+        subprocess.run(["git", "stash", "pop"], stdout=subprocess.PIPE)
+
+    print('\033[32m' + "Back to initial state" + '\033[0m')
+
 
 if __name__ == "__main__":
     # Argument parser
@@ -84,8 +127,7 @@ def fmtSize(num):
     sanitized = ["WIP"]
     for commit in commits[1:]:
         try:
-            c = subprocess.check_output(["git", "rev-parse", commit]).decode("utf-8").strip()
-            sanitized.append(c)
+            sanitized.append(revParse(commit))
         except:
             print('Invalid commit "' + commit + '"')
 
 
@@ -2,11 +2,12 @@
 
 import Criterion
 import Criterion.Main (defaultConfig)
-import Criterion.Types (anMean, reportAnalysis, timeLimit, anOutlierVar, ovEffect, OutlierEffect(Severe))
+import Criterion.Types (anMean, reportAnalysis, timeLimit, anOutlierVar, ovEffect, OutlierEffect(Moderate))
 import Statistics.Resampling.Bootstrap (Estimate(..))
 
 import Control.Monad (filterM)
 import Control.Exception (catch, throwIO)
+import Data.Foldable (for_)
 import Data.Traversable (for)
 import GHC.Exts (fromString)
 
@@ -32,32 +33,39 @@ main = do
   -- Get the test cases
   workingDirectory <- getCurrentDirectory
   let sampleSources = workingDirectory </> "sample-sources"
+      benchIgnore = sampleSources </> ".benchignore"
+  benchIgnoreExists <- doesFileExist benchIgnore
+  ignore <- if benchIgnoreExists
+              then (\f -> map (sampleSources </>) (lines f)) <$> readFile benchIgnore
+              else pure []
   entries <- map (sampleSources </>) <$> listDirectory sampleSources
-  files <- filterM doesFileExist entries
+  files <- filterM doesFileExist (filter (`notElem` ignore) entries)
 
   -- Clear out previous WIP (if there is one)
-  catch (removeFile (workingDirectory </> "timings" </> "WIP" <.> "json"))
+  catch (removeFile (workingDirectory </> "bench" </> "timings" </> "WIP" <.> "json"))
         (\e -> if isDoesNotExistError e then pure () else throwIO e)
 
   -- Run 'criterion' tests
   reports <- for files $ \f -> do
     let name = takeFileName f
     putStrLn name
     is <- readInputStream f
-    bnch <- benchmarkWith' defaultConfig{ timeLimit = 15 } (nf (parse' @(SourceFile Span)) is)
+    bnch <- benchmarkWith' defaultConfig{ timeLimit = 20 } (nf (parse' @(SourceFile Span)) is)
     pure (name, bnch)
   let results = object [ fromString name .= object [ "mean" .= m
                                                    , "lower bound" .= l
                                                    , "upper bound" .= u
                                                    ]
                        | (name,report) <- reports
                        , let Estimate m l u _ = anMean (reportAnalysis report)
-                       , ovEffect (anOutlierVar (reportAnalysis report)) /= Severe
+                       , ovEffect (anOutlierVar (reportAnalysis report)) < Moderate
                        ]
+  for_ [ name | (name,report) <- reports, ovEffect (anOutlierVar (reportAnalysis report)) >= Moderate ] $ \n ->
+    putStrLn $ "Benchmark for `" ++ n ++ "' will not be considered since it was inflated"
 
   -- Save the output to JSON
-  createDirectoryIfMissing False (workingDirectory </> "timings")
-  let logFile = workingDirectory </> "timings" </> logFileName <.> "json"
+  createDirectoryIfMissing False (workingDirectory </> "bench" </> "timings")
+  let logFile = workingDirectory </> "bench" </> "timings" </> logFileName <.> "json"
   putStrLn $ "writing results to: " ++ logFile
   logFile `BL.writeFile` encode results
 
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+# Usage info
+if ! [ $# = 1 ]
+then
+  echo "This script gets all of the (> 1000 LOC) source files in repositories"
+  echo "under 'rust-lang' and 'rust-lang-nursery' organizations"
+  echo ""
+  echo "Expected usage:"
+  echo "                          $0 <destination-folder>"
+  echo ""
+  echo "You probably want to run:"
+  echo "                          $0 sample-sources"
+  exit 1
+else
+  DEST="$1"
+fi
+
+# Work inside a temporary directory
+TEMP=temp
+mkdir $TEMP
+cd $TEMP
+
+# Get the JSON files
+curl https://api.github.com/orgs/rust-lang/repos > rust-lang.json
+curl https://api.github.com/orgs/rust-lang-nursery/repos > rust-lang-nursery.json
+
+# Make one big JSON array of repos and extract the name and clone url
+(jq -rs '.[0] + .[1] | .[] | (.name, .clone_url)' rust-lang.json rust-lang-nursery.json \
+) | while read -r REPO_NAME; read -r REPO_CLONE; do
+ 
+  # Skip 'multirust-rs-binaries' and 'rustc-timing-archive' in particular
+  if [ $REPO_NAME = "multirust-rs-binaries" ] || [ $REPO_NAME = "rustc-timing-archive" ]
+  then
+      continue
+  fi
+
+  # Do a shallow clone of the repo
+  echo "Cloning $REPO_NAME at $REPO_CLONE"
+  git clone --depth=1 $REPO_CLONE
+
+  # Find all rust files in the repo and copy each of these files to the DEST folder, provided they
+  # are more than 2000 lines long. The 2000 line long stipulation serves several purposes: to
+  # provide files that whose parsing time is non-trivial and also source files which are expected to
+  # compile.
+  echo "Finding rust files in $REPO_NAME"
+  find $REPO_NAME -type f -name '*.rs' | while read -r FILE; do
+    
+    # Escaped file name
+    DEST_FILE="../$DEST/${FILE//\//|}"
+
+    # Check the file is longer than 2000 lines
+    if (( 1000 < $(wc -l < "$FILE") ))
+    then
+      cp $FILE $DEST_FILE
+    fi
+
+  done;
+
+  # Delete the cloned repo
+  rm -rf $REPO_NAME
+
+done;
+
+# Clean up
+cd ..
+rm -rf $TEMP
+
@@ -78,7 +78,7 @@ library
 
 
 test-suite unit-tests
-  hs-source-dirs:      tests/unit-tests
+  hs-source-dirs:      test/unit-tests
   ghc-options:         -Wall
   main-is:             Main.hs
   other-modules:       LexerTest
@@ -95,7 +95,7 @@ test-suite unit-tests
                      , language-rust
 
 test-suite rustc-tests
-  hs-source-dirs:      tests/rustc-tests
+  hs-source-dirs:      test/rustc-tests
   ghc-options:         -Wall
   main-is:             Main.hs
   other-modules:       Diff
@@ -113,9 +113,10 @@ test-suite rustc-tests
                      , text >=1.2.0
                      , unordered-containers >= 0.2.7
                      , language-rust
+                     , time >=1.2.0.0
 
 benchmark timing-benchmarks
-  hs-source-dirs:      benchmarks/timing-benchmarks
+  hs-source-dirs:      bench/timing-benchmarks
   ghc-options:         -Wall
   main-is:             Main.hs
   type:                exitcode-stdio-1.0
@@ -131,7 +132,7 @@ benchmark timing-benchmarks
                      , aeson >= 1.0.0.0
 
 benchmark allocation-benchmarks
-  hs-source-dirs:      benchmarks/allocation-benchmarks
+  hs-source-dirs:      bench/allocation-benchmarks
   ghc-options:         -Wall
   main-is:             Main.hs
   type:                exitcode-stdio-1.0
 
@@ -23,7 +23,7 @@ sourceFile :: SourceFile Span
 
 module Language.Rust.Parser (
   -- * Parsing
-  parse, parse', parseSourceFile', Parse(..), P, execParser, initPos, Span,
+  parse, parse', readSourceFile, Parse(..), P, execParser, initPos, Span,
   -- * Lexing
   lexToken, lexNonSpace, lexTokens, translateLit,
   -- * Input stream
@@ -56,8 +56,8 @@ parse' is = case execParser parser is initPos of
               Right x -> x
 
 -- | Given a path pointing to a Rust source file, read that file and parse it into a 'SourceFile'
-parseSourceFile' :: FilePath -> IO (SourceFile Span)
-parseSourceFile' fileName = parse' <$> readInputStream fileName
+readSourceFile :: FilePath -> IO (SourceFile Span)
+readSourceFile fileName = parse' <$> readInputStream fileName
 
 -- | Exceptions that occur during parsing
 data ParseFail = ParseFail Position String deriving (Eq, Typeable)
@@ -74,7 +74,6 @@ class Parse a where
 
 instance Parse (Lit Span) where parser = parseLit
 instance Parse (Attribute Span) where parser = parseAttr
-instance Parse (Arg Span) where parser = parseArg
 instance Parse (Ty Span) where parser = parseTy 
 instance Parse (Pat Span) where parser = parsePat
 instance Parse (Expr Span) where parser = parseExpr