diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..1bfbe9544 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +# Standard ignores +*~ +*.pyc +\#*# +.#* +.*.swp diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..28d015ff0 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,14 @@ +language: c++ +before_install: + # http://about.travis-ci.org/docs/user/build-configuration/#Installing-Packages-Using-apt + - sudo add-apt-repository -y ppa:nschloe/trilinos-nightly + - sudo apt-get update -qq + - sudo apt-get install cmake-data cmake + - sudo apt-get install gfortran +install: + - mkdir build + - cd build/ + - cmake -DCMAKE_BUILD_TYPE:STRING=DEBUG -DTriBITSProj_ENABLE_Fortran:BOOL=ON ../ + - make +script: + - ctest diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..65cc439e6 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,63 @@ +################################################################################ +# # +# TriBITS # +# # +################################################################################ + + +IF (NOT TriBITS_PROCESSED_BASE_PROJECT AND NOT PACKAGE_NAME STREQUAL "TriBITS") + + # This CMakeLists.txt file is being processed as the TriBITSProj projects's + # base CMakeLists.txt file! (See comments at bottom of this file.) + + INCLUDE("${CMAKE_CURRENT_SOURCE_DIR}/ProjectName.cmake") + SET(${PROJECT_NAME}_VERSION 1.0) + PROJECT(${PROJECT_NAME} NONE) + + SET(${PROJECT_NAME}_TRIBITS_DIR "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH "") + INCLUDE("${${PROJECT_NAME}_TRIBITS_DIR}/TriBITS.cmake") + + CMAKE_MINIMUM_REQUIRED(VERSION ${TRIBITS_CMAKE_MINIMUM_REQUIRED}) + + # TriBITS is the only package so just enable it and test it! + SET(${PROJECT_NAME}_ENABLE_TriBITS ON) + SET(${PROJECT_NAME}_ENABLE_TESTS ON CACHE BOOL "Enable test by default.") + + SET(TriBITS_PROCESSED_BASE_PROJECT ON) + + TRIBITS_PROJECT() + +ELSE() + + # This CMakeLists.txt file is being processed as the TriBITS package file. + + TRIBITS_PACKAGE(TriBITS) + + TRIBITS_ADD_TEST_DIRECTORIES( + python + package_arch + ) + + ASSERT_DEFINED(CMAKE_GENERATOR) + IF (CMAKE_GENERATOR STREQUAL "Unix Makefiles") + TRIBITS_ADD_TEST_DIRECTORIES( + ctest/UnitTests + doc/examples/UnitTests + ) + ENDIF() + + TRIBITS_PACKAGE_POSTPROCESS() + +ENDIF() + +# NOTE: In order to allow the `tribits` directory to be both a TriBITS package +# (for inclusion in other TriBITS projects) and to be a TriBITS project +# itself, you only have to put in a simple if statement in this top-level +# CMakeLists.txt file and need to use a different package binary directory in +# PackagesList.cmake. That is all! In fact, this `tribits` directory shows +# how the same directory can be used for a TriBITS package, a TriBITS +# repository, and a TriBITS project! However, you can't name the project the +# same as the package, otherwise, TriBITS woiuld try to try to create two +# targets with the name, such as ${PROJECT_NAME}_libs. Therefore a different +# name and different binary directory must be used for the TriBITS package. +# But that is all. diff --git a/Copyright.txt b/Copyright.txt new file mode 100644 index 000000000..70962ece0 --- /dev/null +++ b/Copyright.txt @@ -0,0 +1,38 @@ +# @HEADER +# ************************************************************************ +# +# TriBITS: Tribal Build, Integrate, and Test System +# Copyright 2013 Sandia Corporation +# +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ************************************************************************ +# @HEADER diff --git a/PackagesList.cmake b/PackagesList.cmake new file mode 100644 index 000000000..a6eb2d533 --- /dev/null +++ b/PackagesList.cmake @@ -0,0 +1,6 @@ +TRIBITS_REPOSITORY_DEFINE_PACKAGES( + TriBITS . PT + ) + +# Must create subdir for binary dir for the TriBITS package +SET(TriBITS_SPECIFIED_BINARY_DIR tribits) diff --git a/ProjectName.cmake b/ProjectName.cmake new file mode 100644 index 000000000..fb07d61e1 --- /dev/null +++ b/ProjectName.cmake @@ -0,0 +1 @@ +SET(PROJECT_NAME TriBITSProj) diff --git a/README.md b/README.md deleted file mode 100644 index 799e46017..000000000 --- a/README.md +++ /dev/null @@ -1,4 +0,0 @@ -TriBITS -======= - -TriBITS: Tribal Build, Integrate, and Test System diff --git a/README.rst b/README.rst new file mode 100644 index 000000000..504757bc1 --- /dev/null +++ b/README.rst @@ -0,0 +1,22 @@ +================================================= +TriBITS: Tribal Build, Integrate, and Test System +================================================= + +The Tribal Build, Integrate, and Test System (TriBITS) is a framework designed +to handle large software development projects involving multiple independent +development teams and multiple source repositories which is built on top of +the open-source CMake set of tools. TriBITS also defines a complete software +development, testing, and deployment system supporting processes consistent + +Documentation +============= + +* See `Version Controlled TriBITS Documentation Index on Github + `_ + (works on github or takes you to github) + +* See `Version Controlled TriBITS Documentation Index locally + `_ (local directory only, link does not work on github) + +.. ToDo: Provide a very short quickstart here to help people get going right +.. away! diff --git a/ReleaseNotes.txt b/ReleaseNotes.txt new file mode 100644 index 000000000..e8d916d46 --- /dev/null +++ b/ReleaseNotes.txt @@ -0,0 +1,57 @@ + +---------------------------------------- +Release Notes for TriBITS +---------------------------------------- + +2014/09/22: + +(*) Changed minimum version of CMake from 2.7 to 2.8.11. + +2014/09/21: + +(*) Added support for the env var TRIBITS_TDD_USE_SYSTEM_CTEST so that if +equal to 1, then the TriBITS Dashboard Driver (TDD) system will use the CTest +(and CMake) in the env will be used instead of being downloaded using +download-cmake.py. This not only speeds up the auotmated builds, but it also +ensures that the automated testing uses exactly the install of CMake/CTest +that is used by the developers on the system. Also, it has been found that +download-cmake.py will download and install a 32bit version even on 64bit +machines. + +Trilinos 11.7: +-------------- + +(*) Switched from the terms Primary Stable (PS) and Secondary Stable (SS) code +to Primary Tested (PT) and Secondary Tested (ST) according to the plan in the +TriBITS Lifecycle model. Using 'PS' and 'SS' is still allowed but is +deprecated. This also included deprecating the varible +_ENABLE_SECONDARY_STABLE_CODE and replacing it with +_ENABLE_SECONDARY_TEST_CODE. Again, backward compatibility is +preserved. Also, the checkin-test.py arg --ss-extra-builds is deprecated and +replaced with --st-extra-builds. + + +Trilinos 11.6: +-------------- + +(*) Changed behavior of _ENABLE_=ON to enable all +subpackages for that package including in propogating forward dependencies. +See updated BuildQuickRef.* document. + + +Trilinos 11.3: +-------------- + +(*) Added ENVIRONMENT env1=val1 env2=val2 ... argument to TRIBITS_ADD_TEST(), +TRIBITS_ADD_ADVANCED_TEST(), and TRIBITS_ADD_EXECUTABLE_AND_TEST(). + +(*) Fixed the generation of headers for explicit instantation system for +subpackages: Now subpackages that use the macro +TRIBITS_CREATE_CLIENT_TEMPLATE_HEADERS() to generate XXX.hpp header files with +or without expliict instantation will key off of the parent package's explicit +instantation setting. In addition, packages that use the macro +TRIBITS_CREATE_CLIENT_TEMPLATE_HEADERS() will also need to add a call to +TRIBITS_ADD_EXPLICIT_INSTANTIATION_OPTION() in their top-level CMakeLists.txt +file. + + diff --git a/TPLsList.cmake b/TPLsList.cmake new file mode 100644 index 000000000..0e2356b38 --- /dev/null +++ b/TPLsList.cmake @@ -0,0 +1,3 @@ +TRIBITS_REPOSITORY_DEFINE_TPLS( + MPI "${${PROJECT_NAME}_TRIBITS_DIR}/tpls/" PT + ) diff --git a/TriBITS.cmake b/TriBITS.cmake new file mode 100644 index 000000000..24f4269ef --- /dev/null +++ b/TriBITS.cmake @@ -0,0 +1,21 @@ +# +# Top-level include file that pulls in TriBITS so it can be used by a project. +# A Project's top-level CMakeLists.cmake file just does: +# +# INCLUDE(${${PROJECT_NAME}_TRIBITS_DIR}/TriBITS.cmake) +# +# and then they can call: +# +# TRIBITS_PROJECT() +# + +IF (${PROJECT_NAME}_TRIBITS_DIR) + SET(TRIBITS_BASE_DIR_LOCAL "${${PROJECT_NAME}_TRIBITS_DIR}") +ELSEIF(CMAKE_CURRENT_LIST_DIR) + SET(TRIBITS_BASE_DIR_LOCAL "${CMAKE_CURRENT_LIST_DIR}") +ELSE() + MESSAGE(FATAL_ERROR "Please set ${PROJECT_NAME}_TRIBITS_DIR" + " or use a CMake version older than 2.8.5!") +ENDIF() + +INCLUDE("${TRIBITS_BASE_DIR_LOCAL}/package_arch/TribitsProject.cmake") diff --git a/Tribits_version.h.in b/Tribits_version.h.in new file mode 100644 index 000000000..b3b535e8b --- /dev/null +++ b/Tribits_version.h.in @@ -0,0 +1,87 @@ +/* +@REPOSITORY_COPYRIGHT_HEADER@ +*/ + +#ifndef @REPOSITORY_NAME_UC@_VERSION_H +#define @REPOSITORY_NAME_UC@_VERSION_H + + +/* @REPOSITORY_NAME@ version numbering convention. + * + * @REPOSITORY_NAME@ version numbers take the form X.Y.Z where: + * + * X: The major version number that defines a window of (perfect) backward + * compatibility (see below). + * + * Y: The release version number within a backward-compatible set of + * versions X. Even numbers (0, 2, 4, ...) are used for releases and odd + * numbers (1, 3, 5, ...) are used for development versions in-between + * releases. + * + * Z: The minor release version number for minor releases taken off off a + * release branch X.Y. Even numbers (0, 2, 4, ...) are used for customer + * releases and odd numbers (1, 3, 5, ...) are used for the code on the + * release X.Y branch in-between minor releases. + * + * All @REPOSITORY_NAME@ releases (i.e. X.Y where Y is even) are taken off of the + * development branch (i.e. the dev version X-1.R or X.Y-1) and are given a + * name containing the version number X.Y. The initial releases in a backward + * compatible set are then given the release numbers: + * + * X.0.0, X.2.0, X.4.0, ... + * + * The intermediate development versions are given the release numbers: + * + * X.1.0, X.3.0, X.5.0, .... + * + * For development versions, the minor release version number Z is always 0. + * + * The minor releases for a given release branch X.Y are given the version + * numbers: + * + * X.Y.0, X.Y.2, X.Y.4, ... + * + * The version numbers given to the code in the release branch X.Y in-between + * minor releases (which are not branched, only tagged) are: + * + * X.Y.1, X.Y.3, X.Y.5, ... + * + * In this way, client code can just examine the version number in this file + * and know exactly what version of @REPOSITORY_NAME@ they are working with with no + * ambiguity no mater what. + */ + + +/* The major version number xx (allows up 99 major @REPOSITORY_NAME@ release version + * numbers). + * + * The major @REPOSITORY_NAME@ version number defines a window of backward + * compatibility. + */ +#define @REPOSITORY_NAME_UC@_MAJOR_VERSION @REPOSITORY_MAJOR_VERSION@ + +/* The major, release, and minor release version numbers (i.e. xx.yy.zz). +* +* NOTE: When numbers are less than 10, it is padded with a 0. For example, +* development version 10.1 of @REPOSITORY_NAME@ is designated 100100 and the release +* version 10.2.4 is designated 100204. This preserves the comparability of +* these version numbers with simple comparison operators used in #ifdef tests. +*/ +#define @REPOSITORY_NAME_UC@_MAJOR_MINOR_VERSION @REPOSITORY_MAJOR_MINOR_VERSION@ + +/* NOTE: These macros are given long int values to allow comparisons in + * preprocessor #if statements. For example, you can do comparisons with ==, + * <, <=, >, and >=. + * + * NOTE: The C++ standard for the C preprocessor requires that the arguments + * for #if must be convertible into a long int. Expressions that convert to 1 + * are true and expressions that convert to 0 are false. + */ + +/* \brief Version string for @REPOSITORY_NAME@. + * + * NOTE: This string is to be used for outputting, not for comparison logic. + */ +#define @REPOSITORY_NAME_UC@_VERSION_STRING "@REPOSITORY_VERSION_STRING@" + +#endif /* @REPOSITORY_NAME_UC@_VERSION_H */ diff --git a/checkin-test.py b/checkin-test.py new file mode 100755 index 000000000..623835b29 --- /dev/null +++ b/checkin-test.py @@ -0,0 +1,1361 @@ +#!/usr/bin/env python + +# @HEADER +# ************************************************************************ +# +# TriBITS: Tribal Build, Integrate, and Test System +# Copyright 2013 Sandia Corporation +# +# Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +# the U.S. Government retains certain rights in this software. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# ************************************************************************ +# @HEADER + +# +# Imports +# + +import os +import sys +import traceback +from optparse import OptionParser + +if os.environ.get("TRIBITS_CHECKIN_TEST_DEBUG_DUMP", "") == "ON": + debugDump = True +else: + debugDump = False + +if debugDump: + print "NOTE: TRIBITS_CHECKIN_TEST_DEBUG_DUMP=ON set in env, doing debug dump ..." + +thisFilePath = __file__ +if debugDump: print "\nthisFilePath =", thisFilePath + +thisFileRealAbsBasePath = os.path.dirname(os.path.abspath(os.path.realpath(thisFilePath))) +if debugDump: print "\nthisFileRealAbsBasePath = '"+thisFileRealAbsBasePath+"'" + +sys.path = [os.path.join(thisFileRealAbsBasePath, 'python')] + sys.path +if debugDump: print "\nsys.path =", sys.path + +from CheckinTest import * +from GeneralScriptSupport import * + +# +# Utility classes and functions. +# + +usageHelp = r"""checkin-test.py [OPTIONS] + +This tool does testing of a TriBITS-based project using CTest and this script +can actually do the push itself using eg/git in a safe way. In fact, it is +recommended that one uses this script to push since it will amend the last +commit message with a (minimal) summary of the builds and tests run with +results and/or send out a summary email about the builds/tests performed. + + +Quickstart: +----------- + +In order to do a safe push, perform the following recommended workflow +(different variations on this workflow are described below): + +1) Commit changes in the local repo: + + # 1.a) See what files are changed, newly added, etc. that need to be committed + # or stashed. + $ eg status + + # 1.b) Stage the files you want to commit (optional) + $ eg stage + + # 1.c) Create your local commits + $ eg commit -- SOMETHING + $ eg commit -- SOMETHING_ELSE + ... + + # 1.d) Stash whatever changes are left you don't want to test/push (optional) + $ eg stash + + NOTE: You can group your commits any way that you would like (see the basic + eg/git documentation). + + NOTE: If not installed on your system, the eg script can be found at + tribits/common_tools/git/eg. Just add it to your path. + + NOTE: When multiple repos are involved, use egdist instead. It is provided + at tribits/common_tools/git/egdist. See egdist --help for details. + +2) Review the changes that you have made to make sure it is safe to push: + + $ cd $PROJECT_HOME + $ eg local-stat # Look at the full status of local repo + $ eg diff --name-status origin # [Optional] Look at the files that have changed + + NOTE: The command 'local-stat' is a git alias that can be installed with the + script tribits/common_tools/git/git-config-alias.sh. It is highly + recommended over just a raw 'eg status' or 'eg log' to review commits before + attempting to test/push commits. + + NOTE: If you see any files/directories that are listed as 'unknown' returned + from 'eg local-stat', then you will need to do an 'eg add' to track them or + add them to an ignore list *before* you run the checkin-test.py script. + The eg script will not allow you to push if there are new 'unknown' files or + uncommitted changes to tracked files. + +3) Set up the checkin base build directory (first time only): + + $ cd $PROJECT_HOME + $ echo CHECKIN >> .git/info/exclude + $ mkdir CHECKIN + $ cd CHECKIN + + NOTE: You may need to set up some configuration files if CMake can not find + the right compilers, MPI, and TPLs by default (see detailed documentation + below). + + NOTE: You might want to set up a simple shell driver script. + + NOTE: You can set up a CHECKIN directory of any name in any location + you want. If you create one outside of the main source dir, then + you will not have to add the git exclude shown above. + +4) Do the pull, configure, build, test, and push: + + $ cd $PROJECT_HOME + $ cd CHECKIN + $ ../checkin-test.py -j4 --do-all --push + + NOTE: The above will: a) pull updates from the global repo(s), b) + automatically enable the correct packages, c) configure and build the right + packages, d) run the tests, e) send you emails about what happened, f) do a + final pull from the global repo, g) optionally amend the last local commit + with the test results, and h) finally push local commits to the global + repo(s) if everything passes. + + NOTE: You must have installed the official versions of eg/git with the + install-git.py script in order to run this script. If you don't, the script + will die right away with an error message telling you what the problem is. + + NOTE: The current branch will be used to pull and push to. A raw 'eg pull' + is performed which will get all of the branches from 'origin'. This means + that your current branch must be a tracking branch so that it will get + updated correctly. The branch 'master' is the most common branch but + release tracking branches are also common. + + NOTE: You must not have any uncommitted changes or the 'eg pull && eg rebase + --against origin' command will fail on the final pull/rebase before the push + and therefore the whole script will fail. To run the script, you will may + need to first use 'eg stash' to stash away your unstaged/uncommitted changes + *before* running this script. + + NOTE: You need to have SSH public/private keys set up to the remote repo + machines for the git commands invoked in the script to work without you + having to type a password. + + NOTE: You can do the final push in a second invocation of the script with a + follow-up run with --push and removing --do-all (it will remember the + results from the build/test cases just ran). For more details, see detailed + documentation below. + + NOTE: Once you start running the checkin-test.py script, you can go off and + do something else and just check your email to see if all the builds and + tests passed and if the push happened or not. + + NOTE: The commands 'cmake', 'ctest', and 'make' must be in your default path + before running this script. + +For more details on using this script, see the detailed documentation below. + + +Detailed Documentation: +----------------------- + +The following approximate steps are performed by this script: + + +---------------------------------------------------------------------------- + + +1) Check to see if the local repo(s) are clean: + + $ eg status + + NOTE: If any modified or any unknown files are shown, the process will be + aborted. The local repo(s) working directory must be clean and ready to + push *everything* that is not stashed away. + +2) Do a 'eg pull' to update the code (done if --pull or --do-all is set): + + NOTE: If not doing a pull, use --allow-no-pull or --local-do-all. + +3) Select the list of packages to enable forward/downstream based on the +package directories where there are changed files (or from a list of packages +passed in by the user). + + NOTE: The automatic enable behavior can be overridden or modified using the + options --enable-all-packages, --enable-packages, --disable-packages, and/or + --no-enable-fwd-packages. + +4) For each build/test case (e.g. MPI_DEBUG, SERIAL_RELEASE, +extra builds specified with --extra-builds): + + 4.a) Configure a build directory in a standard way for all of + the packages that have changed and all of the packages that depend on these + packages forward/downstream. You can manually select which packages get + enabled (see the enable options above). (done if --configure, --do-all, or + --local-do-all is set.) + + 4.b) Build all configured code with 'make' (e.g. with -jN set through + -j or --make-options). (done if --build, --do-all, or --local-do-all is set.) + + 4.c) Run all BASIC tests for enabled packages. (done if --test, --do-all, + or --local-do-all is set.) + + 4.d) Analyze the results of the update, configure, build, and tests and send + email about results. (emails only sent out if --send-emails-to != "") + +5) Do final pull and rebase, append test results to last commit message, and +push (done if --push is set) + + 5.a) Do a final 'eg pull' (done if --pull or --do-all is set) + + 5.b) Do 'eg rebase --against origin/' (done if --pull or + --do-all is set and --rebase is set) + + NOTE: The final 'eg rebase --against origin/' is + required to avoid trivial merge commits that the global get repo + will reject on the push. + + 5.c) Amend commit message of the most recent commit with the summary of the + testing performed. (done if --append-test-results is set.) + + 5.d) Push the local commits to the global repo (done if --push is set) + + +---------------------------------------------------------------------------- + + +The recommended way to use this script is to create a new base CHECKIN test +directory apart from your standard build directories such as with: + + $ $PROJECT_HOME + $ mkdir CHECKIN + $ echo CHECKIN >> .git/info/exclude + +The most basic way to do pre-push testing is with: + + $ cd CHECKIN + $ ../checkin-test.py --do-all [other options] + +If your MPI installation, other compilers, and standard TPLs (i.e. BLAS and +LAPACK) can be found automatically, then this is all you will need to do. +However, if the setup cannot be determined automatically, then you can add a +set of CMake variables that will get read in the files: + + COMMON.config + MPI_DEBUG.config + SERIAL_RELEASE.config + +(or whatever your standard --default-builds are). + +Actually, for built-in build/test cases, skeletons of these files will +automatically be written out with typical CMake cache variables (commented +out) that you would need to set out. Any CMake cache variables listed in +these files will be read into and passed on the configure line to 'cmake'. + +WARNING: Please do not add any extra CMake cache variables than what are +needed to get the Primary Tested (PT) --default-builds builds to work. Adding +other enables/disables will make the builds non-standard and can break these +PT builds. The goal of these configuration files is to allow you to specify +the minimum environment to find MPI, your compilers, and the required TPLs +(e.g. BLAS, LAPACK, etc.). If you need to fudge what packages are enabled, +please use the script arguments --enable-packages, --disable-packages, +--no-enable-fwd-packages, and/or --enable-all-packages to control this, not +the *.config files! + +WARNING: Please do not add any CMake cache variables in the *.config files +that will alter what packages or TPLs are enabled or what tests are run. +Actually, the script will not allow you to change TPL enables in these +standard *.config files because to do so deviates from a consistent build +configuration for Primary Tested (PT) Code. + +NOTE: All tentatively-enabled TPLs (e.g. Pthreads and BinUtils) are hard +disabled in order to avoid different behaviors between machines where they +would be enabled and machines where they would be disabled. + +NOTE: If you want to add extra build/test cases that do not conform to +the standard build/test configurations described above, then you need +to create extra builds with the --extra-builds and/or +--st-extra-builds options (see below). + +NOTE: Before running this script, you should first do an 'eg status' and 'eg +diff --name-status origin..' and examine what files are changed to make sure +you want to push what you have in your local working directory. Also, please +look out for unknown files that you may need to add to the git repository with +'eg add' or add to your ignores list. There cannot be any uncommitted changes +in the local repo before running this script. + +NOTE: You don't need to run this script if you have not changed any files that +affect the build or the tests. For example, if all you have changed are +documentation files, then you don't need to run this script before pushing +manually. + +NOTE: To see detailed debug-level information, set +TRIBITS_CHECKIN_TEST_DEBUG_DUMP=ON in the env before running this script. + + +Common Use Cases (examples): +---------------------------- + +(*) Basic full testing with integrating with global repo(s) without push: + + ../checkin-test.py --do-all + + NOTE: This will result in a set of emails getting sent to your email address + for the different configurations and an overall push readiness status email. + + NOTE: If everything passed, you can follow this up with a --push (see + below). + +(*) Basic full testing with integrating with local repo and push: + + ../checkin-test.py --do-all --push + + NOTE: By default this will rebase your local commits and ammend the last + commit with a short summary of test results. This is appropriate for + pushing commits that only exist in your local repo and are not shared with + any remote repo. + +(*) Push to global repo after a completed set of tests have finished: + + ../checkin-test.py [other options] --push + + NOTE: This will pick up the results for the last completed test runs with + [other options] and append the results of those tests to the log of the most + recent commit. + + NOTE: Take the action options for the prior run and replace --do-all with + --push but keep all of the rest of the options the same. For example, if + you did: + + ../checkin-test.py --enable-packages=Blah --default-builds=MPI_DEBUG --do-all + + then follow that up with: + + ../checkin-test.py --enable-packages=Blah --default-builds=MPI_DEBUG --push + + NOTE: This is a common use case when some tests are failing which aborted + the initial push but you determine it is okay to push anyway and do so with + --force-push. + +(*) Test only the packages modified and not the forward dependent packages: + + ../checkin-test.py --do-all --no-enable-fwd-packages + + NOTE: This is a safe thing to do when only tests in the modified packages + are changed and not library code. This can speed up the testing process and + is to be preferred over not running this script at all. It would be very + hard to make this script automatically determine if only test code has + changed because every package does not follow a set pattern for + tests and test code. + +(*) Run the most important default (e.g. MPI_DEBUG) build/test only: + + ../checkin-test.py --do-all --default-builds=MPI_DEBUG + +(*) The minimum acceptable testing when code has been changed: + + ../checkin-test.py \ + --do-all --enable-all-packages=off --no-enable-fwd-packages \ + --default-builds=MPI_DEBUG + + NOTE: This will do only an MPI DEBUG build and will only build and run the + tests for the packages that have directly been changed and not any forward + packages. Replace "MPI_DEBUG" with whatever your most important default + build is. + +(*) Test only a specific set of packages and no others: + + ../checkin-test.py \ + --enable-packages=,, --no-enable-fwd-packages \ + --do-all + + NOTE: This will override all logic in the script about which packages will + be enabled based on file changes and only the given packages will be + enabled. When there are tens of thousands of changed files and hundreds of + defined packages, this auto-detection algorithm can be very expensive! + + NOTE: You might also want to pass in --enable-all-packages=off in case the + script wants to enable all the packages (see the output in the + checkin-test.py log file for details) and you think it is not necessary to + do so. + + NOTE: Using these options is greatly preferred to not running this script at + all and should not be any more expensive than the testing you would already + do manually before a push. + +(*) Test changes locally without pulling updates: + + ../checkin-test.py --local-do-all + + NOTE: This will just configure, build, test, and send an email notification + without updating or changing the status of the local git repo in any way and + without any communication with the global repo. Hence, you can have + uncommitted changes and still run configure, build, test without having to + commit or having to stash changes. + + NOTE: This is not a sufficient level of testing in order to push the changes + to the global repo because you have not fully integrated your changes yet + with other developers. However, this would be a sufficient level of testing + in order to do a commit on the local machine and then pull to a remote + machine for further testing and a push (see below). + +(*) Adding extra build/test cases: + + Often you will be working on Secondary Tested (ST) Code or Experimental (EX) + Code and want to include the testing of this in your pre-push testing + process along with the standard --default-builds build/test cases which can + only include Primary Tested (PT) Code. In this case you can run with: + + ../checkin-test.py --extra-builds=,,... [other options] + + For example, if you have a build that enables the TPL CUDA you would do: + + echo " + -DTPL_ENABLE_MPI:BOOL=ON + -DTPL_ENABLE_CUDA:BOOL=ON + " > MPI_DEBUG_CUDA.config + + and then run with: + + ../checkin-test.py --extra-builds=MPI_DEBUG_CUDA --do-all + + This will do the standard --default-builds (e.g. MPI_DEBUG and + SERIAL_RELEASE) build/test cases along with your non-standard MPI_DEBUG_CUDA + build/test case. + + NOTE: You can disable the default build/test cases with --default-builds="". + However, please only do this when you are not going to push because you need + at least one default build/test case (the most important default PT case, + e.g. MPI_DEBUG) to do a safe push. + +(*) Including extra repos and extra packages: + + You can also use the checkin-test.py script to continuously integrate + multiple git repos containing add-on packages. To do so, just run: + + ../checkin-test.py --extra-repos=,,... [options] + + NOTE: You have to create local commits in all of the extra repos where there + are changes or the script will abort. + + NOTE: Extra repos can be specified with more flexibility using the + --extra-repos-file and --extra-repos-type arguments (also see + --ignore-missing-extra-repos). + + NOTE: Each of the last local commits in each of the changed repos will get + amended with the appended summary of what was enabled in the build/test (if + --append-test-results is set). + +(*) Avoid changing any of the local commit SHA1s: + + If you are pushing commits from a shared branch, it is critical that you do + not change any of the SHA1s of the commits. Changing the SHA1s for any of + the commits will mess up various multi-repo, multi-branch workflows. To + avoid changing any of the SHA1s of the local commits, one must run with: + + ../checkin-test.py --no-rebase --no-append-test-results [options] + +(*) Performing a remote test/push: + + If you develop on a slow machine like your laptop, doing an appropriate + level of testing can take a long time. In this case, you can pull the + changes to another faster remote workstation and do a more complete set of + tests and push from there. If you are knowledgeable with git, this will be + easy and natural to do, without any help from this script. However, this + script can still help and automate the steps and can do so in one command + invocation on the part of the developer. + + On your slow local development machine 'mymachine', do the limited testing + with: + + ../checkin-test.py --do-all --no-enable-fwd-packages + + On your fast remote test machine, do a full test and push with: + + ../checkin-test.py \ + --extra-pull-from=:master \ + --do-all --push + + where is a git repo pointing to + mymachine:/some/dir/to/your/src:master (see 'git help remote'). + + NOTE: You can of course adjust the packages and/or build/test cases that get + enabled on the different machines. + + NOTE: Once you invoke the checkin-test.py script on the remote test machine + and it has pulled the commits from mymachine, then you can start changing + files again on your local development machine and just check your email to + see what happens on the remote test machine. + + NOTE: If something goes wrong on the remote test machine, you can either + work on fixing the problem there or you can fix the problem on your local + development machine and then do the process over again. + + NOTE: If you alter the commits on the remote machine (such as squashing + commits), you will have trouble merging back on our local machine. + Therefore, if you have to to fix problems, make new commits and don't alter + the ones you pulled from your local machine (but rebasing them should be + okay as long as the local commits on mymachine are not pushed to other + repos). + + NOTE: Git will resolve the duplicated commits when you pull the commits + pushed from the remote machine. Git knows that the commits are the same and + will do the right thing when rebasing (or just merging). + +(*) Check push readiness status: + + ../checkin-test.py + + NOTE: This will examine results for the last testing process and send out an + email stating if the a push is ready to perform or not. + +(*) See the default option values without doing anything: + + ../checkin-test.py --show-defaults + + NOTE: This is the easiest way to figure out what all of the default options + are. + +Hopefully the above documentation, the example use cases, the documentation of +the command-line arguments below, and some experimentation will be enough to +get you going using this script for all of your pre-push testing and pushes. +If that is not sufficient, send email to your development support team to ask +for help. + + +Handling of PT, ST, and EX Code in built-in and extra builds: +------------------------------------------------------------- + +This script will only process PT (Primary Tested) packages in the +--default-builds (e.g. MPI_DEBUG and SERIAL_RELEASE) builds. This is to avoid +problems of side-effects of turning on ST packages that would impact PT +packages (e.g. an ST package getting enabled that enables an ST TPL which +turns on support for that TPL in a PT package producing different code which +might work but the pure PT build without the extra TPL may actually be broken +and not know it). Therefore, any non-PT packages that are enabled (either +implicitly through changed files or explicitly by listing in --enable-packages) +will be turned off in the --default-builds builds. If none of the enabled +packages are PT, then they will all be disabled and the --default-builds +builds will be skipped. + +In order to better support the development of ST and EX packages, this script +allows you to define some extra builds that will be invoked and used to +determine overall pass/fail before a potential push. The option +--st-extra-builds is used to specify extra builds that will test ST packages +(and also PT packages if any are enabled). If only PT packages are enabled +then the builds specified in --st-extra-builds will still be run. The +reasoning is that PT packages may contain extra ST features and therefore if +the goal is to test these ST builds it is desirable to also run these builds +because they also my impact downstream ST packages. + +Finally, the option --extra-builds will test all enabled packages, including +EX packages, regardless of their test group. Therefore, when using +--extra-builds, be careful that you watch what packages are enabled. If you +change an EX package, it will be enabled in --extra-builds builds. + +A few use cases might help better demonstrate the behavior. Consider +the following input arguments specifying extra builds + + --st-extra-builds=MPI_DEBUG_ST --extra-builds=INTEL_DEBUG + +with the packages Teuchos, Phalanx, and Meros where Teuchos is PT, Phalanx is +ST, and Meros is EX. + +Here is what packages would be enabled in each of the builds: + + --default-builds=MPI_DEBUG,SERIAL_RELEASE \ + --st-extra-builds=MPI_DEBUG_ST \ + --extra-builds=INTEL_DEBUG + +and which packages would be excluded: + +A) --enable-packages=Teuchos: + MPI_DEBUG: [Teuchos] + SERIAL_RELEASE: [Teuchos] + MPI_DEBUG_ST: [Teuchos] + INTEL_DEBUG: [Teuchos] Always enabled! + +B) --enable-packages=Phalanx: + MPI_DEBUG: [] Skipped, no PT packages! + SERIAL_RELEASE: [] Skipped, no PT packages! + MPI_DEBUG_ST: [Phalanx] + INTEL_DEBUG: [Phalanx] + +C) --enable-packages=Meros: + MPI_DEBUG: [] Skipped, no PT packages! + SERIAL_RELEASE: [] Skipped, no PT packages! + MPI_DEBUG_ST: [] Skipped, no PT or ST packages! + INTEL_DEBUG: [Meros] + +D) --enable-packages=Teuchos,Phalanx: + MPI_DEBUG: [Teuchos] + SERIAL_RELEASE: [Teuchos] + MPI_DEBUG_ST: [Teuchos,Phalanx] + INTEL_DEBUG: [Teuchos,Phalanx] + +E) --enable-packages=Teuchos,Phalanx,Meros: + MPI_DEBUG: [Teuchos] + SERIAL_RELEASE: [Teuchos] + MPI_DEBUG_ST: [Teuchos,Phalanx] + INTEL_DEBUG: [Teuchos,Phalanx,Meros] + +The --extra-builds=INTEL_DEBUG build is always performed with all of the +enabled packages. This logic given above must be understood in order to +understand the output given in the script. + + +Conventions for Command-Line Arguments: +--------------------------------------- + +The command-line arguments are segregated into three broad categories: a) +action commands, b) aggregate action commands, and c) others. + +a) The action commands are those such as --build, --test, etc. and are shown +with [ACTION] in their documentation. These action commands have no off +complement. If the action command appears, then the action will be performed. + +b) Aggregate action commands such as --do-all and --local-do-all turn on sets +of other action commands and are shown with [AGGR ACTION] in their +documentation. The sub-actions that these aggregate action commands turn on +cannot be disabled with other arguments. + +c) Other arguments are those that are not marked with [ACTION] or [AGGR +ACTION] tend to either pass in data and turn control flags on or off. + + +Exit Code: +--------- + +This script returns 0 if the actions requested are successful. This does not +necessarily imply that it is okay to do a push. For example, if only --pull +is passed in and is successful, then 0 will be returned but that does *not* +mean that it is okay to do a push. A 0 return value is a necessary but not +sufficient condition for readiness to push. + +""" + +# ToDo: Break up the above huge documention block into different "topics" and +# then display those topics with --help-topic=. Also provide a +# --help-all that will combine all of the --help-topic documentation with the +# standard documentation to produce where is there now. + +def runProjectTestsWithCommandLineArgs(commandLineArgs, configuration = {}): + + clp = ConfigurableOptionParser(configuration.get('defaults', {}), usage=usageHelp) + + clp.add_option( + "--project-configuration", dest="projectConfiguration", type="string", default="", + help="Custom file to provide configuration defaults for the project." \ + + " By default, the file project-checkin-test-config.py is looked for" \ + + " in /../.. (assuming default /cmake/tribits/" \ + + " directory structure and second is looked for in / (which" \ + + " is common practice to symlink the checkin-test.py script into the project's" \ + + " base directory). If this file is set to a location that is not in the" \ + + " project's base directory, then --src-dir must be set to point to the" \ + + " project's base directory." + ) + + clp.add_option( + "--show-defaults", dest="showDefaults", action="store_true", + help="Show the default option values and do nothing at all.", + default=False ) + + clp.add_option( + "--project-name", dest="projectName", action="store", + help="Set the project's name. This is used to locate various files.", + default=None) + + clp.add_option( + "--eg-git-version-check", dest="enableEgGitVersionCheck", action="store_true", + help="Enable automatic check for the right versions of eg and git. [default]" ) + clp.add_option( + "--no-eg-git-version-check", dest="enableEgGitVersionCheck", action="store_false", + help="Do not check the versions of eg and git, just trust they are okay.", + default=True ) + + clp.add_option( + '--src-dir', dest="srcDir", type="string", default="", + help="The source base directory for code to be tested. The default is determined" \ + +" by the location of the found project-checkin-test-config.py file." ) + + configuredBuilds = [build for build, unused in + configuration.get('cmake', {}).get('default-builds', [])] + clp.add_option( + '--default-builds', dest='defaultBuilds', type='string', + default=','.join(configuredBuilds), + help="Comma separated list of builds that should always be run by default.") + + clp.add_option( + "--extra-repos-file", dest="extraReposFile", type="string", default="", + help="File path to an extra repositories list file. If set to 'project', then " \ + +"/cmake/ExtraRepositoriesList.cmake is read. See the argument " \ + +"--extra-repos for details on how this list is used (default empty '')") + + g_extraRepoTypesList = [""] + g_extraRepoTypesList.extend(g_knownTribitsTestRepoTypes) + + addOptionParserChoiceOption( + "--extra-repos-type", "extraReposType", g_extraRepoTypesList, 0, + "The test type of repos to read from .", + clp ) + + clp.add_option( + "--extra-repos", dest="extraRepos", type="string", default="", + help="List of comma separated extra repositories " \ + +"containing extra packages that can be enabled. The order these repos is " + +"listed in not important. This option overrides --extra-repos-file.") + + clp.add_option( + "--ignore-missing-extra-repos", dest="ignoreMissingExtraRepos", action="store_true", + help="If set, then extra repos read in from will be ignored " \ + +"and removed from list. This option is not applicable if =='' " \ + +"or ==''." ) + clp.add_option( + "--require-extra-repos-exist", dest="ignoreMissingExtraRepos", action="store_false", + default=False, + help="If set, then all listed extra repos must exist or the script will exit. [default]" ) + + clp.add_option( + "--with-cmake", dest="withCmake", type="string", default="cmake", + help="CMake executable to use with cmake -P scripts internally (only set" \ + +" by unit testing code).") + + clp.add_option( + "--skip-deps-update", dest="skipDepsUpdate", action="store_true", + help="If set, skip the update of the dependency XML file. If the package structure" \ + " has not changed since the last invocation, then it is safe to use this option.", + default=False ) + + clp.add_option( + "--enable-packages", dest="enablePackages", type="string", default="", + help="List of comma separated packages to test changes for" \ + +" (example, 'Teuchos,Epetra'). If this list of packages is empty, then" \ + +" the list of packages to enable will be determined automatically by examining" \ + +" the set of modified files from the version control update log. Note that"\ + +" this will skip the auto-detection of changed packages based on changed"\ + +" files." ) + + clp.add_option( + "--disable-packages", dest="disablePackages", type="string", default="", + help="List of comma separated packages to explicitly disable" \ + +" (example, 'Tpetra,NOX'). This list of disables will be appended after" \ + +" all of the listed enables no mater how they are determined (see" \ + +" --enable-packages option). NOTE: Only use this option to remove packages" \ + +" that will not build for some reason. You can disable tests that run" \ + +" by using the CTest option -E passed through the --ctest-options argument" \ + +" in this script." ) + + addOptionParserChoiceOption( + "--enable-all-packages", "enableAllPackages", ('auto', 'on', 'off'), 0, + "Determine if all packages are enabled 'on', or 'off', or 'auto'" \ + +" (let other logic decide). Setting to 'off' is appropriate when" \ + +" the logic in this script determines that a global build file has changed" \ + +" but you know that you don't need to rebuild and test every package for" \ + +" a reasonable test. Setting --enable-packages effectively disables this" \ + +" option. Setting this to 'off' does *not* stop the forward enabling" \ + +" of downstream packages for packages that are modified or set by --enable-packages."\ + +" Setting this to 'on' will skip the automatic detection of changed packages"\ + +" based on changed files. It can be helpful to stop the auto-detection changed"\ + +" packages when there are thousands of changed files and hundreds of defined"\ + +" packages." , + clp ) + + clp.add_option( + "--enable-fwd-packages", dest="enableFwdPackages", action="store_true", + help="Enable forward packages. [default]" ) + clp.add_option( + "--no-enable-fwd-packages", dest="enableFwdPackages", action="store_false", + help="Do not enable forward packages.", default=True ) + + clp.add_option( + "--continue-if-no-updates", dest="abortGracefullyIfNoUpdates", action="store_false", + help="If set, then the script will continue if no updates are pulled from any repo. [default]", + default=False ) + clp.add_option( + "--abort-gracefully-if-no-updates", dest="abortGracefullyIfNoUpdates", action="store_true", + help="If set, then the script will abort gracefully if no updates are pulled from any repo.", + default=False ) + + clp.add_option( + "--continue-if-no-changes-to-push", dest="abortGracefullyIfNoChangesToPush", action="store_false", + help="If set, then the script will continue if no changes to push from any repo. [default]", + default=False ) + clp.add_option( + "--abort-gracefully-if-no-changes-to-push", dest="abortGracefullyIfNoChangesToPush", action="store_true", + help="If set, then the script will abort gracefully if no changes to push from any repo.", + default=False ) + + clp.add_option( + "--continue-if-no-enables", dest="abortGracefullyIfNoEnables", action="store_false", + help="If set, then the script will continue if no packages are enabled. [default]", + default=False ) + clp.add_option( + "--abort-gracefully-if-no-enables", dest="abortGracefullyIfNoEnables", action="store_true", + help="If set, then the script will abort gracefully if no packages are enabled.", + default=False ) + + clp.add_option( + "--extra-cmake-options", dest="extraCmakeOptions", type="string", + default=configuration.get('extra-cmake-options', ''), + help="Extra options to pass to 'cmake' after all other options." \ + +" This should be used only as a last resort. To disable packages, instead use" \ + +" --disable-packages. To change test categories, use --test-categories." ) + + clp.add_option( + "--test-categories", dest="testCategories", type="string", + default="BASIC", + help="." \ + +" Change the test categories. Can be 'BASIC', 'CONTINUOUS', " \ + " 'NIGHTLY', or 'WEEKLY' (default 'BASIC')." ) + + clp.add_option( + "-j", dest="overallNumProcs", type="string", default="", + help="The options to pass to make and ctest (e.g. -j4)." ) + + clp.add_option( + "--make-options", dest="makeOptions", type="string", default="", + help="The options to pass to make (e.g. -j4)." ) + + clp.add_option( + "--ctest-options", dest="ctestOptions", type="string", default="", + help="Extra options to pass to 'ctest' (e.g. -j2)." ) + + clp.add_option( + "--ctest-timeout", dest="ctestTimeOut", type="float", default=300, + help="timeout (in seconds) for each single 'ctest' test (e.g. 180" \ + +" for three minutes)." ) + + clp.add_option( + "--show-all-tests", dest="showAllTests", action="store_true", + help="Show all of the tests in the summary email and in the commit message" \ + +" summary (see --append-test-results)." ) + clp.add_option( + "--no-show-all-tests", dest="showAllTests", action="store_false", + help="Don't show all of the test results in the summary email. [default]", + default=False ) + + clp.add_option( + "--without-default-builds", dest="withoutDefaultBuilds", action="store_true", + default=False, + help="Skip the default builds (same as --default-builds='')." \ + +" You would use option along with --extra-builds=BUILD1,BUILD2,... to run your own" \ + +" local custom builds." ) + + clp.add_option( + "--st-extra-builds", dest="stExtraBuilds", type="string", default="", + help="List of comma-separated ST extra build names. For each of the build names in" \ + +" --st-extra-builds=,,..., there must be a file .config in" \ + +" the local directory along side the COMMON.config file that defines the special" \ + +" build options for the extra build." ) + + clp.add_option( + "--ss-extra-builds", dest="ssExtraBuilds", type="string", default="", + help="DEPRECATED! Use --st-extra-builds instead!." ) + + clp.add_option( + "--extra-builds", dest="extraBuilds", type="string", default="", + help="List of comma-separated extra build names. For each of the build names in" \ + +" --extra-builds=,,..., there must be a file .config in" \ + +" the local directory along side the COMMON.config file that defines the special" \ + +" build options for the extra build." ) + + clp.add_option( + "--send-email-to", dest="sendEmailTo", type="string", + default=getCmndOutput("git config --get user.email", True, False), + help="List of comma-separated email addresses to send email notification to" \ + +" after every build/test case finishes and at the end for an overall summary" \ + +" and push status." \ + +" By default, this is the email address you set for git returned by" \ + +" `git config --get user.email`. In order to turn off email" \ + +" notification, just set --send-email-to='' and no email will be sent." ) + + clp.add_option( + "--skip-case-send-email", dest="skipCaseSendEmail", action="store_true", + help="If set then if a build/test case is skipped for some reason (i.e." \ + +" because no packages are enabled) then an email will go out for that case." \ + +" [default]" ) + clp.add_option( + "--skip-case-no-email", dest="skipCaseSendEmail", action="store_false", + help="If set then if a build/test case is skipped for some reason (i.e." \ + +" because no packages are enabled) then no email will go out for that case." \ + +" [default]", + default=True ) + + clp.add_option( + "--send-email-for-all", dest="sendEmailOnlyOnFailure", action="store_false", + help="If set, then emails will get sent out for all operations. [default]" ) + clp.add_option( + "--send-email-only-on-failure", dest="sendEmailOnlyOnFailure", action="store_true", + help="If set, then emails will only get sent out for failures.", + default=False ) + + clp.add_option( + "--send-email-to-on-push", dest="sendEmailToOnPush", type="string", + default=configuration.get('SendEmailOnPush', ''), + help="List of comma-separated email addresses to send email notification to" \ + +" on a successful push. This is used to log pushes to a central list." \ + +" In order to turn off this email" \ + +" notification, just set --send-email-to-on-push='' and no email will be sent" \ + +" to these email lists." ) + + clp.add_option( + "--force-push", dest="forcePush", action="store_true", + help="Force the local push even if there are build/test errors." \ + +" WARNING: Only do this when you are 100% certain that the errors are not" \ + +" caused by your code changes. This only applies when --push is specified" \ + +" and this script.") + clp.add_option( + "--no-force-push", dest="forcePush", action="store_false", default=False, + help="Do not force a push if there are failures. [default]" ) + + clp.add_option( + "--do-push-readiness-check", dest="doPushReadinessCheck", action="store_true", + help="Check the push readiness status at the end and send email if not actually" \ + +" pushing. [default]" ) + clp.add_option( + "--skip-push-readiness-check", dest="doPushReadinessCheck", action="store_false", + default=True, + help="Skip push status check." ) + + clp.add_option( + "--rebase", dest="rebase", action="store_true", + help="Rebase the local commits on top of origin/master before amending" \ + +" the last commit and pushing. Rebasing keeps a nice linear commit" \ + +" history like with CVS or SVN and will work perfectly for the basic" \ + +" workflow of adding commits to the 'master' branch and then syncing" \ + +" up with origin/master before the final push. [default]" ) + clp.add_option( + "--no-rebase", dest="rebase", action="store_false", + help="Do not rebase the local commits on top of origin/master before" \ + +" amending the final commit and pushing. This allows for some more " \ + +" complex workflows involving local branches with multiple merges." \ + +" However, this will result in non-linear history and will allow for" \ + +" trivial merge commits with origin/master to get pushed. This mode" \ + +" should only be used in cases where the rebase mode will not work or " \ + +" when it is desired to use a merge commit to integrate changes on a" \ + +" branch that you wish be able to easily back out. For sophisticated" \ + +" users of git, this may in fact be the preferred mode.", + default=True ) + + clp.add_option( + "--append-test-results", dest="appendTestResults", action="store_true", + help="Before the final push, amend the most recent local commit by appending a" \ + +" summary of the test results. This provides a record of what builds" \ + +" and tests were performed in order to test the local changes. This is only " \ + +" performed if --push is also set. NOTE: If the same" \ + +" local commit is amended more than once, the prior test summary sections will be" \ + +" overwritten with the most recent test results from the current run. [default]" ) + clp.add_option( + "--no-append-test-results", dest="appendTestResults", action="store_false", + help="Do not amend the last local commit with test results. NOTE: If you have" \ + +" uncommitted local changes that you do not want this script to commit then you" \ + +" must select this option to avoid this last amending commit. Also, if you are" \ + +" pushing commits from a shared branch and don't want to change any of the SHA1s" \ + +" for the commits, then you must set this option!", + default=True ) + + clp.add_option( + "--extra-pull-from", dest="extraPullFrom", type="string", default="", + help="Optional extra git pull ':' to merge in changes from after" \ + +" pulling in changes from 'origin'. This option uses a colon with no spaces in between" \ + +" :' to avoid issues with passing arguments with spaces." \ + +" For example --extra-pull-from=machine:/base/dir/repo:master." \ + +" This extra pull is only done if --pull is also specified. NOTE: when using" \ + +" --extra-repo=REPO1,REPO2,... the must be a named repository that is" \ + +" present in all of the git repos or it will be an error." ) + + clp.add_option( + "--allow-no-pull", dest="allowNoPull", action="store_true", default=False, + help="Allowing for there to be no pull performed and still doing the other actions." \ + +" This option is useful for testing against local changes without having to" \ + +" get the updates from the global repo. However, if you don't pull, you can't" \ + +" push your changes to the global repo. WARNING: This does *not* stop a pull" \ + +" attempt from being performed by --pull or --do-all!" ) + + clp.add_option( + "--wipe-clean", dest="wipeClean", action="store_true", default=False, + help="[ACTION] Blow existing build directories and build/test results. The action can be" \ + +" performed on its own or with other actions in which case the wipe clean will be" \ + +" performed before any other actions. NOTE: This will only wipe clean the builds" \ + +" that are specified and will not touch those being ignored (e.g. SERIAL_RELEASE" \ + +" will not be removed if --default-builds=MPI_DEBUG is specified)." ) + + clp.add_option( + "--pull", dest="doPull", action="store_true", default=False, + help="[ACTION] Do the pull from the default (origin) repository and optionally also" \ + +" merge in changes from the repo pointed to by --extra-pull-from." ) + + clp.add_option( + "--configure", dest="doConfigure", action="store_true", default=False, + help="[ACTION] Do the configure step." ) + + clp.add_option( + "--build", dest="doBuild", action="store_true", default=False, + help="[ACTION] Do the build step." ) + + clp.add_option( + "--test", dest="doTest", action="store_true", default=False, + help="[ACTION] Do the running of the enabled tests." ) + + clp.add_option( + "--local-do-all", dest="localDoAll", action="store_true", default=False, + help="[AGGR ACTION] Do configure, build, and test with no pull (same as setting" \ + +" --allow-no-pull ---configure --build --test)." \ + +" This is the same as --do-all except it does not do --pull and also allows for no pull." ) + + clp.add_option( + "--do-all", dest="doAll", action="store_true", default=False, + help="[AGGR ACTION] Do update, configure, build, and test (same as --pull --configure" \ + +" --build --test). NOTE: This will do a --pull regardless if --allow-no-pull" \ + +" is set or not. To avoid the pull, use --local-do-all." ) + + clp.add_option( + "--push", dest="doPush", action="store_true", default=False, + help="[ACTION] Push the committed changes in the local repo into to global repo" \ + +" 'origin' for the current branch. Note: If you have uncommitted changes this" \ + +" command will fail. Note: You must have SSH public/private keys set up with" \ + +" the origin machine (e.g. software.sandia.gov) for the push to happen without" \ + +" having to type your password." ) + + clp.add_option( + "--execute-on-ready-to-push", dest="executeOnReadyToPush", type="string", default="", + help="[ACTION] A command to execute on successful execution and 'READY TO PUSH'" \ + +" status from this script. This can be used to do a remote SSH invocation to a" \ + +" remote machine to do a remote pull/test/push after this machine finishes." ) + + (options, args) = clp.parse_args(args=commandLineArgs) + + # NOTE: Above, in the pairs of boolean options, the *last* add_option(...) + # takes effect! That is why the commands are ordered the way they are! + + + # + # Echo the command-line + # + + print "" + print "**************************************************************************" + print "Script: checkin-test.py \\" + + if options.enableEgGitVersionCheck: + print " --eg-git-version-check \\" + else: + print " --no-eg-git-version-check \\" + print " --src-dir='" + options.srcDir+"' \\" + print " --default-builds='" + options.defaultBuilds + "' \\" + print " --extra-repos-file='"+options.extraReposFile+"' \\" + print " --extra-repos-type='"+options.extraReposType+"' \\" + print " --extra-repos='"+options.extraRepos+"' \\" + if options.ignoreMissingExtraRepos: + print " --ignore-missing-extra-repos \\" + else: + print " --require-extra-repos-exist \\" + if options.skipDepsUpdate: + print " --skip-deps-update \\" + print " --enable-packages='"+options.enablePackages+"' \\" + print " --disable-packages='"+options.disablePackages+"' \\" + print " --enable-all-packages='"+options.enableAllPackages+"'\\" + if options.enableFwdPackages: + print " --enable-fwd-packages \\" + else: + print " --no-enable-fwd-packages \\" + if options.abortGracefullyIfNoUpdates: + print " --abort-gracefully-if-no-updates \\" + else: + print " --continue-if-no-updates \\" + if options.abortGracefullyIfNoChangesToPush: + print " --abort-gracefully-if-no-changes-to-push \\" + else: + print " --continue-if-no-changes-to-push \\" + if options.abortGracefullyIfNoEnables: + print " --abort-gracefully-if-no-enables \\" + else: + print " --continue-if-no-enables \\" + print " --extra-cmake-options='"+options.extraCmakeOptions+"' \\" + print " --test-categories='"+options.testCategories+"' \\" + if options.overallNumProcs: + print " -j"+options.overallNumProcs+" \\" + print " --make-options='"+options.makeOptions+"' \\" + print " --ctest-options='"+options.ctestOptions+"' \\" + print " --ctest-timeout="+str(options.ctestTimeOut)+" \\" + if options.showAllTests: + print " --show-all-tests \\" + else: + print " --no-show-all-tests \\" + if options.withoutDefaultBuilds: + print " --without-default-builds \\" + print " --st-extra-builds='"+options.stExtraBuilds+"' \\" + print " --extra-builds='"+options.extraBuilds+"' \\" + print " --send-email-to='"+options.sendEmailTo+"' \\" + if options.skipCaseSendEmail: + print " --skip-case-send-email \\" + else: + print " --skip-case-no-email \\" + if not options.sendEmailOnlyOnFailure: + print " --send-email-for-all \\" + else: + print " --send-email-only-on-failure \\ " + print " --send-email-to-on-push='"+options.sendEmailToOnPush+"' \\" + if options.forcePush: + print " --force-push \\" + else: + print " --no-force-push \\" + if options.doPushReadinessCheck: + print " --do-push-readiness-check \\" + else: + print " --skip-push-readiness-check \\" + if options.rebase: + print " --rebase \\" + else: + print " --no-rebase \\" + if options.appendTestResults: + print " --append-test-results \\" + else: + print " --no-append-test-results \\" + if options.extraPullFrom: + print " --extra-pull-from='"+options.extraPullFrom+"' \\" + if options.allowNoPull: + print " --allow-no-pull \\" + if options.wipeClean: + print " --wipe-clean \\" + if options.doPull: + print " --pull \\" + if options.doConfigure: + print " --configure \\" + if options.doBuild: + print " --build \\" + if options.doTest: + print " --test \\" + if options.localDoAll: + print " --local-do-all \\" + if options.doAll: + print " --do-all \\" + if options.doPush: + print " --push \\" + if options.executeOnReadyToPush: + print " --execute-on-ready-to-push=("+options.executeOnReadyToPush+") \\" + if options.ssExtraBuilds: + print " --ss-extra-builds='"+options.ssExtraBuilds+"' \\" + print "\nWARNING: --ss-extra-builds is deprecated! Use --st-extra-builds instead!" + if options.stExtraBuilds: + print "ERROR: Can't set deprecated --ss-extra-builds and --st-extra-builds together!" + sys.exit(3) + options.stExtraBuilds = options.ssExtraBuilds + + + # + # Check the input arguments + # + + if options.doAll and options.localDoAll: + print "\nError, you can not use --do-all and --local-do-all together! Use on or the other!" + sys.exit(1) + + if options.doAll and options.allowNoPull: + print "\nError, you can not use --do-all and --allow-no-pull together! (see the" \ + " documentation for the --do-all, --local-do-all, and --allow-no-pull arguments.)" + sys.exit(2) + + if options.extraPullFrom: + getRepoSpaceBranchFromOptionStr(options.extraPullFrom) # Will validate form + + + # + # Execute the checkin test guts + # + + import time + + if not options.showDefaults: + + print "\nStarting time:", getCmndOutput("date",True) + + baseDir = getCompleteFileDirname(__file__) + + t1 = time.time() + success = checkinTest(baseDir, options, configuration) + t2 = time.time() + print "\nTotal time for checkin-test.py =", formatMinutesStr((t2-t1)/60.0) + + print "\nFinal time:", getCmndOutput("date",True) + + if options.ssExtraBuilds: + print "\n***" + print "*** FINAL WARNING: stop using deprecated --ss-extra-builds! Use --st-extra-builds instead!" + print "***" + + if success: + print "\nREQUESTED ACTIONS: PASSED\n" + return True + else: + print "\nREQUESTED ACTIONS: FAILED\n" + return False + else: + return True + + +def getConfigurationSearchPaths(): + """ + Gets a list of paths to search for the configuration. If this file + was invoked from a symlink, look in the directory that contains the + symlink. The returned list will always contain at least one element. + """ + result = [] + + # Always look for the configuration file assuming the checkin-test.py script + # is run out of the standared snapshotted tribits directory + # /cmake/tribits/. + result.append(os.path.join(thisFileRealAbsBasePath, '..', '..')) + + # Lastly, look for the checkin-test.py file's base directory path. It is + # common practice to symbolically link the checkin-test.py script into the + # project's base source directory. NOTE: Don't use realpath here! We don't + # want to follow symbolic links! + result.append(os.path.dirname(os.path.abspath(__file__))) + + return result + + +def loadConfigurationFile(filepath): + if os.path.exists(filepath): + sys_path_old = sys.path + try: + modulePath = os.path.dirname(filepath) + moduleFile = os.path.basename(filepath) + moduleName, extension = os.path.splitext(moduleFile) + sys.path = [modulePath] + sys_path_old + try: + if debugDump: + print "\nLoading project configuration from %s..." % filepath + print "\nsys.path =", sys.path + configuration = __import__(moduleName).configuration + if debugDump: + print "\nSetting the default --src-dir='"+modulePath+"'" + configuration.get("defaults").update({"--src-dir" : modulePath}) + return configuration + except Exception, e: + print e + raise e + finally: + sys.path = sys_path_old + if debugDump: + print "\nsys.path =", sys.path + else: + raise Exception('The file %s does not exist.' % filepath) + + +def locateAndLoadConfiguration(path_hints = []): + """ + Locate and load a module called + checkin_test_project_configuration.py. The path_hints argument can + be used to provide location hints at which to locate the + file. Returns a configuration dictionary. If the module is not + found, this dictionary will be empty. + """ + for path in path_hints: + candidate = os.path.join(path, "project-checkin-test-config.py") + if debugDump: print "\nLooking for candidate configuration file '%s'" % candidate + if os.path.exists(candidate): + return loadConfigurationFile(candidate) + return {} + + +# +# Main +# + +def main(cmndLineArgs): + + # See if the help option is set or not + helpOpt = len( set(cmndLineArgs) & set(("--help", "-h")) ) > 0 + + # See if --show-defaults was set or not + showDefaultsOpt = len( set(cmndLineArgs) & set(("--show-defaults", "dummy")) ) > 0 + + if (not helpOpt) and (not showDefaultsOpt): + logFile = file("checkin-test.out", "w") + else: + logFile = None + + # There are a lot of print statements in the implementation. It's + # easier to reset sys.stdout and sys.stderr to a TeeOutput object + # than to replace them. + teeOutput = TeeOutput(logFile) + originalStdout = sys.stdout + originalStderr = sys.stderr + try: + sys.stdout = teeOutput + sys.stderr = teeOutput + try: + # See if there is a configuration file override. + configuration = None + for arg in cmndLineArgs: + if arg.startswith('--project-configuration='): + print "Found configuration override %s..." % arg + configuration = loadConfigurationFile(arg.split('=')[1]) + elif not configuration and arg.startswith('--src-dir='): + configuration = locateAndLoadConfiguration([arg.split('=')[1]]) + if not configuration: + configuration = locateAndLoadConfiguration(getConfigurationSearchPaths()) + if debugDump: + print "\nConfiguration loaded from configuration file =", configuration + success = runProjectTestsWithCommandLineArgs(cmndLineArgs, configuration) + except SystemExit, e: + # In Python 2.4, SystemExit inherits Exception, but for proper exit + # behavior the SystemExit exception must propagate all the way to the top + # of the call stack. It cannot get handled by the catch Exception below. + raise e + except Exception, e: + success = False + traceback.print_exc(file=teeOutput) + finally: + # Reset stdout and stderr + sys.stdout = originalStdout + sys.stderr = originalStderr + + if success: + return 0 + else: + return 1 + +if __name__ == '__main__': + sys.exit(main(sys.argv[1:])) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake new file mode 100644 index 000000000..92a0c72c4 --- /dev/null +++ b/cmake/Dependencies.cmake @@ -0,0 +1,3 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + TEST_OPTIONAL_TPLS MPI + ) diff --git a/common_tools/cloc/README b/common_tools/cloc/README new file mode 100644 index 000000000..3b4599004 --- /dev/null +++ b/common_tools/cloc/README @@ -0,0 +1,28 @@ +2012/02/15 + +Included in this directory is a snapshot of the GPL Perl script cloc.pl. This +is a source code line counting tool. Also contained in this directory are the +files cloc..in that defines specializations of the default cloc +langauge definitions for TriBITS software. Adding a language definition file +directly to TriBITS aids in trying to help standardize the names of language +files a little. + +To run the tool on your project, do: + + $ cd SOME_PROJECT + $ $TRIBITS_DIR/common_tools/cloc/cloc.pl \ + --read-lang-def= \ + . + +Here, the different language definition files are: + + $TRIBITS_DIR/common_tools/cloc/cloc.core-languages.in + $TRIBITS_DIR/common_tools/cloc/cloc.script-languages.in + +Different TriBITS projects can copy these langauge definition files and define +their own language standards. However, for the purposes of TriBITS meta build +across projects, it would be beneficial if all TriBITS projects could use a +single standard for language file extensions. + +This will print statisitics to the screen. See cloc.pl --help for other +useful option. diff --git a/common_tools/cloc/cloc.core-langauges.in b/common_tools/cloc/cloc.core-langauges.in new file mode 100644 index 000000000..5b370b49a --- /dev/null +++ b/common_tools/cloc/cloc.core-langauges.in @@ -0,0 +1,74 @@ +C + filter remove_matches ^\s*// + filter call_regexp_common C + filter remove_inline //.*$ + extension c + extension ec + extension pgc + 3rd_gen_scale 0.77 + end_of_line_continuation \\$ +C Header + filter remove_matches ^\s*// + filter call_regexp_common C + filter remove_inline //.*$ + extension h + 3rd_gen_scale 1.00 + end_of_line_continuation \\$ +C++ + filter remove_matches ^\s*// + filter remove_inline //.*$ + filter call_regexp_common C + extension C + extension cc + extension cpp + extension cxx + extension pcc + 3rd_gen_scale 1.51 + end_of_line_continuation \\$ +C++ Header + filter remove_matches ^\s*// + filter call_regexp_common C + filter remove_inline //.*$ + extension H + extension hh + extension hpp + extension hxx + 3rd_gen_scale 1.51 + end_of_line_continuation \\$ +Fortran 77 + filter remove_f77_comments + filter remove_inline \!.*$ + extension F + extension F77 + extension f + extension f77 + extension pfo + 3rd_gen_scale 0.75 +Fortran 90 + filter remove_f77_comments + filter remove_f90_comments + filter remove_inline \!.*$ + extension F90 + extension f90 + 3rd_gen_scale 1.00 +Fortran 95 + filter remove_f77_comments + filter remove_f90_comments + filter remove_inline \!.*$ + extension F95 + extension f95 + 3rd_gen_scale 1.13 +Fortran 2003 + filter remove_f77_comments + filter remove_f90_comments + filter remove_inline \!.*$ + extension F03 + extension f03 + 3rd_gen_scale 1.13 +Java + filter remove_matches ^\s*// + filter call_regexp_common C + filter remove_inline //.*$ + extension java + 3rd_gen_scale 1.36 + end_of_line_continuation \\$ diff --git a/common_tools/cloc/cloc.pl b/common_tools/cloc/cloc.pl new file mode 100755 index 000000000..acddec1ea --- /dev/null +++ b/common_tools/cloc/cloc.pl @@ -0,0 +1,8264 @@ +#!/usr/bin/env perl +# cloc -- Count Lines of Code {{{1 +# Copyright (C) 2006-2011 Northrop Grumman Corporation +# Author: Al Danial +# First release August 2006 +# +# Includes code from: +# - SLOCCount v2.26 +# http://www.dwheeler.com/sloccount/ +# by David Wheeler. +# - Regexp::Common v2.120 +# http://search.cpan.org/~abigail/Regexp-Common-2.120/lib/Regexp/Common.pm +# by Damian Conway and Abigail. +# - Win32::Autoglob +# http://search.cpan.org/~sburke/Win32-Autoglob-1.01/Autoglob.pm +# by Sean M. Burke. +# - Algorithm::Diff +# http://search.cpan.org/~tyemq/Algorithm-Diff-1.1902/lib/Algorithm/Diff.pm +# by Tye McQueen. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details: +# http://www.gnu.org/licenses/gpl.txt +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# 1}}} +my $VERSION = sprintf("%.2f", 1.55); +my $URL = "http://cloc.sourceforge.net"; +require 5.006; +# use modules {{{1 +use warnings; +use strict; +use Getopt::Long; +use File::Basename; +use File::Temp qw { tempfile tempdir }; +use File::Find; +use File::Path; +use File::Spec; +use IO::File; +use POSIX "strftime"; + +# Digest::MD5 isn't in the standard distribution. Use it only if installed. +my $HAVE_Digest_MD5 = 0; +eval "use Digest::MD5;"; +if (defined $Digest::MD5::VERSION) { + $HAVE_Digest_MD5 = 1; +} else { + warn "Digest::MD5 not installed; will skip file uniqueness checks.\n"; +} + +my $HAVE_Rexexp_Common; +# Regexp::Common isn't in the standard distribution. It will +# be installed in a temp directory if necessary. +BEGIN { + if (eval "use Regexp::Common;") { + $HAVE_Rexexp_Common = 1; + } else { + $HAVE_Rexexp_Common = 0; + } +} + +my $HAVE_Algorith_Diff = 0; +# Algorithm::Diff isn't in the standard distribution. It will +# be installed in a temp directory if necessary. +eval "use Algorithm::Diff qw ( sdiff ) "; +if (defined $Algorithm::Diff::VERSION) { + $HAVE_Algorith_Diff = 1; +} else { + Install_Algorithm_Diff(); +} +# print "2 HAVE_Algorith_Diff = $HAVE_Algorith_Diff\n"; +# test_alg_diff($ARGV[$#ARGV - 1], $ARGV[$#ARGV]); die; + +# Uncomment next two lines when building Windows executable with perl2exe +# or if running on a system that already has Regexp::Common. +#use Regexp::Common; +#$HAVE_Rexexp_Common = 1; + +#perl2exe_include "Regexp/Common/whitespace.pm" +#perl2exe_include "Regexp/Common/URI.pm" +#perl2exe_include "Regexp/Common/URI/fax.pm" +#perl2exe_include "Regexp/Common/URI/file.pm" +#perl2exe_include "Regexp/Common/URI/ftp.pm" +#perl2exe_include "Regexp/Common/URI/gopher.pm" +#perl2exe_include "Regexp/Common/URI/http.pm" +#perl2exe_include "Regexp/Common/URI/pop.pm" +#perl2exe_include "Regexp/Common/URI/prospero.pm" +#perl2exe_include "Regexp/Common/URI/news.pm" +#perl2exe_include "Regexp/Common/URI/tel.pm" +#perl2exe_include "Regexp/Common/URI/telnet.pm" +#perl2exe_include "Regexp/Common/URI/tv.pm" +#perl2exe_include "Regexp/Common/URI/wais.pm" +#perl2exe_include "Regexp/Common/CC.pm" +#perl2exe_include "Regexp/Common/SEN.pm" +#perl2exe_include "Regexp/Common/number.pm" +#perl2exe_include "Regexp/Common/delimited.pm" +#perl2exe_include "Regexp/Common/profanity.pm" +#perl2exe_include "Regexp/Common/net.pm" +#perl2exe_include "Regexp/Common/zip.pm" +#perl2exe_include "Regexp/Common/comment.pm" +#perl2exe_include "Regexp/Common/balanced.pm" +#perl2exe_include "Regexp/Common/lingua.pm" +#perl2exe_include "Regexp/Common/list.pm" +#perl2exe_include "File/Glob.pm" + +use Text::Tabs qw { expand }; +use Cwd qw { cwd }; +# 1}}} +# Usage information, options processing. {{{1 +my $ON_WINDOWS = 0; + $ON_WINDOWS = 1 if ($^O =~ /^MSWin/) or ($^O eq "Windows_NT"); +if ($ON_WINDOWS and $ENV{'SHELL'}) { + if ($ENV{'SHELL'} =~ m{^/}) { + $ON_WINDOWS = 1; # make Cygwin look like Unix + } else { + $ON_WINDOWS = 0; # MKS defines $SHELL but still acts like Windows + } +} + +my $NN = chr(27) . "[0m"; # normal + $NN = "" if $ON_WINDOWS; +my $BB = chr(27) . "[1m"; # bold + $BB = "" if $ON_WINDOWS; +my $script = basename $0; +my $usage = " +Usage: $script [options] | | + + Count, or compute differences of, physical lines of source code in the + given files (may be archives such as compressed tarballs or zip files) + and/or recursively below the given directories. + + ${BB}Input Options${NN} + --extract-with= This option is only needed if cloc is unable + to figure out how to extract the contents of + the input file(s) by itself. + Use to extract binary archive files (e.g.: + .tar.gz, .zip, .Z). Use the literal '>FILE<' as + a stand-in for the actual file(s) to be + extracted. For example, to count lines of code + in the input files + gcc-4.2.tar.gz perl-5.8.8.tar.gz + on Unix use + --extract-with='gzip -dc >FILE< | tar xf -' + or, if you have GNU tar, + --extract-with='tar zxf >FILE<' + and on Windows use: + --extract-with=\"\\\"c:\\Program Files\\WinZip\\WinZip32.exe\\\" -e -o >FILE< .\" + (if WinZip is installed there). + --list-file= Take the list of file and/or directory names to + process from which has one file/directory + name per line. See also --exclude-list-file. + --unicode Check binary files to see if they contain Unicode + expanded ASCII text. This causes performance to + drop noticably. + + ${BB}Processing Options${NN} + --autoconf Count .in files (as processed by GNU autoconf) of + recognized languages. + --by-file Report results for every source file encountered. + --by-file-by-lang Report results for every source file encountered + in addition to reporting by language. + --diff Compute differences in code and comments between + source file(s) of and . The inputs + may be pairs of files, directories, or archives. + Use --diff-alignment to generate a list showing + which file pairs where compared. See also + --ignore-case, --ignore-whitespace. + --follow-links [Unix only] Follow symbolic links to directories + (sym links to files are always followed). + --force-lang=[,] + Process all files that have a extension + with the counter for language . For + example, to count all .f files with the + Fortran 90 counter (which expects files to + end with .f90) instead of the default Fortran 77 + counter, use + --force-lang=\"Fortran 90\",f + If is omitted, every file will be counted + with the counter. This option can be + specified multiple times (but that is only + useful when is given each time). + See also --script-lang, --lang-no-ext. + --ignore-whitespace Ignore horizontal white space when comparing files + with --diff. See also --ignore-case. + --ignore-case Ignore changes in case; consider upper- and lower- + case letters equivalent when comparing files with + --diff. See also --ignore-whitespace. + --lang-no-ext= Count files without extensions using the + counter. This option overrides internal logic + for files without extensions (where such files + are checked against known scripting languages + by examining the first line for #!). See also + --force-lang, --script-lang. + --read-binary-files Process binary files in addition to text files. + This is usually a bad idea and should only be + attempted with text files that have embedded + binary data. + --read-lang-def= Load from the language processing filters. + (see also --write-lang-def) then use these filters + instead of the built-in filters. + --script-lang=, Process all files that invoke as a #! + scripting language with the counter for language + . For example, files that begin with + #!/usr/local/bin/perl5.8.8 + will be counted with the Perl counter by using + --script-lang=Perl,perl5.8.8 + The language name is case insensitive but the + name of the script language executable, , + must have the right case. This option can be + specified multiple times. See also --force-lang, + --lang-no-ext. + --sdir= Use as the scratch directory instead of + letting File::Temp chose the location. Files + written to this location are not removed at + the end of the run (as they are with File::Temp). + --skip-uniqueness Skip the file uniqueness check. This will give + a performance boost at the expense of counting + files with identical contents multiple times + (if such duplicates exist). + --strip-comments= For each file processed, write to the current + directory a version of the file which has blank + lines and comments removed. The name of each + stripped file is the original file name with + . appended to it. It is written to the + current directory unless --original-dir is on. + --original-dir [Only effective in combination with + --strip-comments] Write the stripped files + to the same directory as the original files. + --sum-reports Input arguments are report files previously + created with the --report-file option. Makes + a cumulative set of results containing the + sum of data from the individual report files. + + ${BB}Filter Options${NN} + --exclude-dir=[,D2,] Exclude the given comma separated directories + D1, D2, D3, et cetera, from being scanned. For + example --exclude-dir=.cache,test will skip + all files that have /.cache/ or /test/ as part + of their path. + Directories named .cvs and .svn are always + excluded. + --exclude-ext=[,[...]] + Do not count files having the given file name + extensions. + --exclude-lang=[,L2,] Exclude the given comma separated languages + L1, L2, L3, et cetera, from being counted. + --exclude-list-file= Ignore files and/or directories whose names + appear in . should have one entry + per line. Relative path names will be resolved + starting from the directory where cloc is + invoked. See also --list-file. + --match-f= Only count files whose basenames match the Perl + regex. For example + --match-f=^[Ww]idget + only counts files that start with Widget or widget. + --not-match-f= Count all files except those whose basenames + match the Perl regex. + --match-d= Only count files in directories matching the Perl + regex. For example + --match-d=/src/ + only counts files in directories containing + /src/ + --not-match-d= Count all files except those in directories + matching the Perl regex. + --skip-win-hidden On Windows, ignore hidden files. + + ${BB}Debug Options${NN} + --categorized= Save names of categorized files to . + --counted= Save names of processed source files to . + --diff-alignment= Write to a list of files and file pairs + showing which files were added, removed, and/or + compared during a run with --diff. This switch + forces the --diff mode on. + --help Print this usage information and exit. + --found= Save names of every file found to . + --ignored= Save names of ignored files and the reason they + were ignored to . + --print-filter-stages Print to STDOUT processed source code before and + after each filter is applied. + --show-ext[=] Print information about all known (or just the + given) file extensions and exit. + --show-lang[=] Print information about all known (or just the + given) languages and exit. + -v[=] Verbose switch (optional numeric value). + --version Print the version of this program and exit. + + --write-lang-def= Writes to the language processing filters + then exits. Useful as a first step to creating + custom language definitions (see --read-lang-def). + + ${BB}Output Options${NN} + --3 Print third-generation language output. + (This option can cause report summation to fail + if some reports were produced with this option + while others were produced without it.) + --progress-rate= Show progress update after every files are + processed (default =100). Set to 0 to + suppress progress output (useful when redirecting + output to STDOUT). + --quiet Suppress all information messages except for + the final report. + --report-file= Write the results to instead of STDOUT. + --out= Synonym for --report-file=. + --csv Write the results as comma separated values. + --sql= Write results as SQL create and insert statements + which can be read by a database program such as + SQLite. If is 1, output is sent to STDOUT. + --sql-project= Use as the project identifier for the + current run. Only valid with the --sql option. + --sql-append Append SQL insert statements to the file specified + by --sql and do not generate table creation + statements. Only valid with the --sql option. + --sum-one For plain text reports, show the SUM: output line + even if only one input file is processed. + --xml Write the results in XML. + --xsl= Reference as an XSL stylesheet within + the XML output. If is 1 (numeric one), + writes a default stylesheet, cloc.xsl (or + cloc-diff.xsl if --diff is also given). + This switch forces --xml on. + --yaml Write the results in YAML. + +"; +# Help information for options not yet implemented: +# --inline Process comments that appear at the end +# of lines containing code. +# --html Create HTML files of each input file showing +# comment and code lines in different colors. + +$| = 1; # flush STDOUT +my $start_time = time(); +my ( + $opt_categorized , + $opt_found , + @opt_force_lang , + $opt_lang_no_ext , + @opt_script_lang , + $opt_diff , + $opt_diff_alignment , + $opt_html , + $opt_ignored , + $opt_counted , + $opt_show_ext , + $opt_show_lang , + $opt_progress_rate , + $opt_print_filter_stages , + $opt_v , + $opt_version , + $opt_exclude_lang , + $opt_exclude_list_file , + $opt_exclude_dir , + $opt_read_lang_def , + $opt_write_lang_def , + $opt_strip_comments , + $opt_original_dir , + $opt_quiet , + $opt_report_file , + $opt_sdir , + $opt_sum_reports , + $opt_unicode , + $opt_no3 , # accept it but don't use it + $opt_3 , + $opt_extract_with , + $opt_by_file , + $opt_by_file_by_lang , + $opt_xml , + $opt_xsl , + $opt_yaml , + $opt_csv , + $opt_match_f , + $opt_not_match_f , + $opt_match_d , + $opt_not_match_d , + $opt_skip_uniqueness , + $opt_list_file , + $opt_help , + $opt_skip_win_hidden , + $opt_read_binary_files , + $opt_sql , + $opt_sql_append , + $opt_sql_project , + $opt_inline , + $opt_exclude_ext , + $opt_ignore_whitespace , + $opt_ignore_case , + $opt_follow_links , + $opt_autoconf , + $opt_sum_one , + ); +my $getopt_success = GetOptions( + "by_file|by-file" => \$opt_by_file , + "by_file_by_lang|by-file-by-lang" => \$opt_by_file_by_lang , + "categorized=s" => \$opt_categorized , + "counted=s" => \$opt_counted , + "exclude_lang|exclude-lang=s" => \$opt_exclude_lang , + "exclude_dir|exclude-dir=s" => \$opt_exclude_dir , + "exclude_list_file|exclude-list-file=s" => \$opt_exclude_list_file , + "extract_with|extract-with=s" => \$opt_extract_with , + "found=s" => \$opt_found , + "diff" => \$opt_diff , + "diff-alignment|diff_alignment=s" => \$opt_diff_alignment , + "html" => \$opt_html , + "ignored=s" => \$opt_ignored , + "quiet" => \$opt_quiet , + "read_lang_def|read-lang-def=s" => \$opt_read_lang_def , + "show_ext|show-ext:s" => \$opt_show_ext , + "show_lang|show-lang:s" => \$opt_show_lang , + "progress_rate|progress-rate=i" => \$opt_progress_rate , + "print_filter_stages|print-filter-stages" => \$opt_print_filter_stages , + "report_file|report-file=s" => \$opt_report_file , + "out=s" => \$opt_report_file , + "script_lang|script-lang=s" => \@opt_script_lang , + "sdir=s" => \$opt_sdir , + "skip_uniqueness|skip-uniqueness" => \$opt_skip_uniqueness , + "strip_comments|strip-comments=s" => \$opt_strip_comments , + "original_dir|original-dir" => \$opt_original_dir , + "sum_reports|sum-reports" => \$opt_sum_reports , + "unicode" => \$opt_unicode , + "no3" => \$opt_no3 , # ignored + "3" => \$opt_3 , + "v:i" => \$opt_v , + "version" => \$opt_version , + "write_lang_def|write-lang-def=s" => \$opt_write_lang_def , + "xml" => \$opt_xml , + "xsl=s" => \$opt_xsl , + "force_lang|force-lang=s" => \@opt_force_lang , + "lang_no_ext|lang-no-ext=s" => \$opt_lang_no_ext , + "yaml" => \$opt_yaml , + "csv" => \$opt_csv , + "match_f|match-f=s" => \$opt_match_f , + "not_match_f|not-match-f=s" => \$opt_not_match_f , + "match_d|match-d=s" => \$opt_match_d , + "not_match_d|not-match-d=s" => \$opt_not_match_d , + "list_file|list-file=s" => \$opt_list_file , + "help" => \$opt_help , + "skip_win_hidden|skip-win-hidden" => \$opt_skip_win_hidden , + "read_binary_files|read-binary-files" => \$opt_read_binary_files , + "sql=s" => \$opt_sql , + "sql_project|sql-project=s" => \$opt_sql_project , + "sql_append|sql-append" => \$opt_sql_append , + "inline" => \$opt_inline , + "exclude_ext|exclude-ext=s" => \$opt_exclude_ext , + "ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace , + "ignore_case|ignore-case" => \$opt_ignore_case , + "follow_links|follow-links" => \$opt_follow_links , + "autoconf" => \$opt_autoconf , + "sum_one|sum-one" => \$opt_sum_one , + ); +$opt_by_file = 1 if defined $opt_by_file_by_lang; +my $CLOC_XSL = "cloc.xsl"; # created with --xsl + $CLOC_XSL = "cloc-diff.xsl" if $opt_diff; +die "\n" unless $getopt_success; +die $usage if $opt_help; +my %Exclude_Language = (); + %Exclude_Language = map { $_ => 1 } split(/,/, $opt_exclude_lang) + if $opt_exclude_lang; +my %Exclude_Dir = (); + %Exclude_Dir = map { $_ => 1 } split(/,/, $opt_exclude_dir ) + if $opt_exclude_dir ; +# Forcibly exclude .svn, .cvs, .hg directories. The contents of these +# directories often conflict with files of interest. +$opt_exclude_dir = 1; +$Exclude_Dir{".svn"} = 1; +$Exclude_Dir{".cvs"} = 1; +$Exclude_Dir{".hg"} = 1; +$opt_diff = 1 if $opt_diff_alignment; +$opt_exclude_ext = "" unless $opt_exclude_ext; +$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace; +$opt_ignore_case = 0 unless $opt_ignore_case; +$opt_lang_no_ext = 0 unless $opt_lang_no_ext; +$opt_follow_links = 0 unless $opt_follow_links; + +# Options defaults: +$opt_progress_rate = 100 unless defined $opt_progress_rate; +if (!defined $opt_v) { + $opt_v = 0; +} elsif (!$opt_v) { + $opt_v = 1; +} +if (defined $opt_xsl) { + $opt_xsl = $CLOC_XSL if $opt_xsl eq "1"; + $opt_xml = 1; +} +my $skip_generate_report = 0; +$opt_sql = 0 unless defined $opt_sql; +if ($opt_sql eq "1") { # stream SQL output to STDOUT + $opt_quiet = 1; + $skip_generate_report = 1; + $opt_by_file = 1; + $opt_sum_reports = 0; + $opt_progress_rate = 0; +} elsif ($opt_sql) { # write SQL output to a file + $opt_by_file = 1; + $skip_generate_report = 1; + $opt_sum_reports = 0; +} +die $usage unless defined $opt_version or + defined $opt_show_lang or + defined $opt_show_ext or + defined $opt_write_lang_def or + defined $opt_list_file or + defined $opt_xsl or + scalar @ARGV >= 1; +die "--diff requires at least two arguments\n" + if $opt_diff and scalar @ARGV < 2; +if ($opt_version) { + printf "$VERSION\n"; + exit; +} +# 1}}} +# Step 1: Initialize global constants. {{{1 +# +my $nFiles_Found = 0; # updated in make_file_list +my (%Language_by_Extension, %Language_by_Script, + %Filters_by_Language, %Not_Code_Extension, %Not_Code_Filename, + %Language_by_File, %Scale_Factor, %Known_Binary_Archives, + %EOL_Continuation_re, + ); +my $ALREADY_SHOWED_HEADER = 0; +my $ALREADY_SHOWED_XML_SECTION = 0; +my %Error_Codes = ( 'Unable to read' => -1, + 'Neither file nor directory' => -2, + 'Diff error (quoted comments?)' => -3, + ); +if ($opt_read_lang_def) { + read_lang_def( + $opt_read_lang_def , # Sample values: + \%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77' + \%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell' + \%Language_by_File , # Language_by_File{makefile} = 'make' + \%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] = + # [ 'remove_matches' , '^\s*#' ] + \%Not_Code_Extension , # Not_Code_Extension{jpg} = 1 + \%Not_Code_Filename , # Not_Code_Filename{README} = 1 + \%Scale_Factor , # Scale_Factor{Perl} = 4.0 + \%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$' + ); +} else { + set_constants( # + \%Language_by_Extension, # Language_by_Extension{f} = 'Fortran 77' + \%Language_by_Script , # Language_by_Script{sh} = 'Bourne Shell' + \%Language_by_File , # Language_by_File{makefile} = 'make' + \%Filters_by_Language , # Filters_by_Language{Bourne Shell}[0] = + # [ 'remove_matches' , '^\s*#' ] + \%Not_Code_Extension , # Not_Code_Extension{jpg} = 1 + \%Not_Code_Filename , # Not_Code_Filename{README} = 1 + \%Scale_Factor , # Scale_Factor{Perl} = 4.0 + \%Known_Binary_Archives, # Known_Binary_Archives{.tar} = 1 + \%EOL_Continuation_re , # EOL_Continuation_re{C++} = '\\$' + ); +} +if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) { + die_unknown_lang($opt_lang_no_ext, "--lang-no-ext") +} + +# Process command line provided extention-to-language mapping overrides. +# Make a hash of known languages in lower case for easier matching. +my %Recognized_Language_lc = (); # key = language name in lc, value = true name +foreach my $language (keys %Filters_by_Language) { + my $lang_lc = lc $language; + $Recognized_Language_lc{$lang_lc} = $language; +} +my %Forced_Extension = (); # file name extensions which user wants to count +my $All_One_Language = 0; # set to !0 if --force-lang's is missing +foreach my $pair (@opt_force_lang) { + my ($lang, $extension) = split(',', $pair); + my $lang_lc = lc $lang; + if (defined $extension) { + $Forced_Extension{$extension} = $lang; + + die_unknown_lang($lang, "--force-lang") + unless $Recognized_Language_lc{$lang_lc}; + + $Language_by_Extension{$extension} = $Recognized_Language_lc{$lang_lc}; + } else { + # the scary case--count everything as this language + $All_One_Language = $Recognized_Language_lc{$lang_lc}; + } +} + +foreach my $pair (@opt_script_lang) { + my ($lang, $script_name) = split(',', $pair); + my $lang_lc = lc $lang; + if (!defined $script_name) { + die "The --script-lang option requires a comma separated pair of ". + "strings.\n"; + } + + die_unknown_lang($lang, "--script-lang") + unless $Recognized_Language_lc{$lang_lc}; + + $Language_by_Script{$script_name} = $Recognized_Language_lc{$lang_lc}; +} + +# If user provided file extensions to ignore, add these to +# the exclusion list. +foreach my $ext (map { $_ => 1 } split(/,/, $opt_exclude_ext ) ) { + $ext = lc $ext if $ON_WINDOWS; + $Not_Code_Extension{$ext} = 1; +} + +# If SQL output is requested, keep track of directory names generated by +# File::Temp::tempdir and used to temporarily hold the results of compressed +# archives. Contents of the SQL table 't' will be much cleaner if these +# meaningless directory names are stripped from the front of files pulled +# from the archives. +my %TEMP_DIR = (); + +# invert %Language_by_Script hash to get an easy-to-look-up list of known +# scripting languages +my %Script_Language = map { $_ => 1 } values %Language_by_Script ; +# 1}}} +# Step 2: Early exits for display, summation. {{{1 +# +print_extension_info($opt_show_ext ) if defined $opt_show_ext ; +print_language_info( $opt_show_lang) if defined $opt_show_lang; +exit if (defined $opt_show_ext) or (defined $opt_show_lang); + +#print "Before glob have [", join(",", @ARGV), "]\n"; +@ARGV = windows_glob(@ARGV) if $ON_WINDOWS; +#print "after glob have [", join(",", @ARGV), "]\n"; + +if ($opt_sum_reports and $opt_diff) { + my @results = (); + if ($opt_list_file) { # read inputs from the list file + my @list = read_list_file($opt_list_file); + @results = combine_diffs(\@list); + } else { # get inputs from the command line + @results = combine_diffs(\@ARGV); + } + if ($opt_report_file) { + write_file($opt_report_file, @results); + } else { + print "\n", join("\n", @results), "\n"; + } + exit; +} +if ($opt_sum_reports) { + my %Results = (); + foreach my $type( "by language", "by report file" ) { + my $found_lang = undef; + if ($opt_list_file) { # read inputs from the list file + my @list = read_list_file($opt_list_file); + $found_lang = combine_results(\@list, + $type, + \%{$Results{ $type }}, + \%Filters_by_Language ); + } else { # get inputs from the command line + $found_lang = combine_results(\@ARGV, + $type, + \%{$Results{ $type }}, + \%Filters_by_Language ); + } + next unless %Results; + my $end_time = time(); + my @results = generate_report($VERSION, $end_time - $start_time, + $type, + \%{$Results{ $type }}, \%Scale_Factor); + if ($opt_report_file) { + my $ext = ".lang"; + $ext = ".file" unless $type eq "by language"; + next if !$found_lang and $ext eq ".lang"; + write_file($opt_report_file . $ext, @results); + } else { + print "\n", join("\n", @results), "\n"; + } + } + exit; +} +if ($opt_write_lang_def) { + write_lang_def($opt_write_lang_def , + \%Language_by_Extension, + \%Language_by_Script , + \%Language_by_File , + \%Filters_by_Language , + \%Not_Code_Extension , + \%Not_Code_Filename , + \%Scale_Factor , + \%EOL_Continuation_re , + ); + exit; +} +# 1}}} +# Step 3: Create a list of files to consider. {{{1 +# a) If inputs are binary archives, first cd to a temp +# directory, expand the archive with the user-given +# extraction tool, then add the temp directory to +# the list of dirs to process. +# b) Create a list of every file that might contain source +# code. Ignore binary files, zero-sized files, and +# any file in a directory the user says to exclude. +# c) Determine the language for each file in the list. +# +my @binary_archive = (); +my $cwd = cwd(); +if ($opt_extract_with) { +#print "cwd main = [$cwd]\n"; + my @extract_location = (); + foreach my $bin_file (@ARGV) { + my $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit + $TEMP_DIR{ $extract_dir } = 1 if $opt_sql; + print "mkdir $extract_dir\n" if $opt_v; + print "cd $extract_dir\n" if $opt_v; + chdir $extract_dir; + my $bin_file_full_path = ""; + if (File::Spec->file_name_is_absolute( $bin_file )) { + $bin_file_full_path = $bin_file; +#print "bin_file_full_path (was ful) = [$bin_file_full_path]\n"; + } else { + $bin_file_full_path = File::Spec->catfile( $cwd, $bin_file ); +#print "bin_file_full_path (was rel) = [$bin_file_full_path]\n"; + } + my $extract_cmd = uncompress_archive_cmd($bin_file_full_path); + print $extract_cmd, "\n" if $opt_v; + system $extract_cmd; + push @extract_location, $extract_dir; + chdir $cwd; + } + # It is possible that the binary archive itself contains additional + # files compressed the same way (true for Java .ear files). Go + # through all the files that were extracted, see if they are binary + # archives and try to extract them. Lather, rinse, repeat. + my $binary_archives_exist = 1; + my $count_binary_archives = 0; + my $previous_count = 0; + while ($binary_archives_exist) { + @binary_archive = (); + foreach my $dir (@extract_location) { + find(\&archive_files, $dir); # populates global @binary_archive + } + foreach my $archive (@binary_archive) { + my $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit + $TEMP_DIR{ $extract_dir } = 1 if $opt_sql; + print "mkdir $extract_dir\n" if $opt_v; + print "cd $extract_dir\n" if $opt_v; + chdir $extract_dir; + + my $extract_cmd = uncompress_archive_cmd($archive); + print $extract_cmd, "\n" if $opt_v; + system $extract_cmd; + push @extract_location, $extract_dir; + unlink $archive; # otherwise will be extracting it forever + } + $count_binary_archives = scalar @binary_archive; + if ($count_binary_archives == $previous_count) { + $binary_archives_exist = 0; + } + $previous_count = $count_binary_archives; + } + chdir $cwd; + + @ARGV = @extract_location; +} else { + # see if any of the inputs need to be auto-uncompressed &/or expanded + my @updated_ARGS = (); + foreach my $Arg (@ARGV) { + if (is_dir($Arg)) { + push @updated_ARGS, $Arg; + next; + } + my $full_path = ""; + if (File::Spec->file_name_is_absolute( $Arg )) { + $full_path = $Arg; + } else { + $full_path = File::Spec->catfile( $cwd, $Arg ); + } +#print "full_path = [$full_path]\n"; + my $extract_cmd = uncompress_archive_cmd($full_path); + if ($extract_cmd) { + my $extract_dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit + $TEMP_DIR{ $extract_dir } = 1 if $opt_sql; + print "mkdir $extract_dir\n" if $opt_v; + print "cd $extract_dir\n" if $opt_v; + chdir $extract_dir; + print $extract_cmd, "\n" if $opt_v; + system $extract_cmd; + push @updated_ARGS, $extract_dir; + chdir $cwd; + } else { + # this is a conventional, uncompressed, unarchived file + # or a directory; keep as-is + push @updated_ARGS, $Arg; + } + } + @ARGV = @updated_ARGS; +} +# 1}}} +my @Errors = (); +my @file_list = (); # global variable updated in files() +my %Ignored = (); # files that are not counted (language not recognized or + # problems reading the file) +my @Lines_Out = (); +if ($opt_diff) { +# Step 4: Separate code from non-code files. {{{1 +my @fh = (); +my @files_for_set = (); +# make file lists for each separate argument +if ($opt_exclude_list_file) { + process_exclude_list_file($opt_exclude_list_file, + \%Exclude_Dir, + \%Ignored); +} +for (my $i = 0; $i < scalar @ARGV; $i++) { + push @fh, + make_file_list([ $ARGV[$i] ], \%Error_Codes, \@Errors, \%Ignored); + @{$files_for_set[$i]} = @file_list; + @file_list = (); +} +# 1}}} +# Step 5: Remove duplicate files. {{{1 +# +my %Language = (); +my %unique_source_file = (); +my $n_set = 0; +foreach my $FH (@fh) { # loop over each pair of file sets + ++$n_set; + remove_duplicate_files($FH, + \%{$Language{$FH}} , + \%{$unique_source_file{$FH}} , + \%Error_Codes , + \@Errors , + \%Ignored ); + printf "%2d: %8d unique file%s. \r", + $n_set, + plural_form(scalar keys %unique_source_file) + unless $opt_quiet; +} +# 1}}} +# Step 6: Count code, comments, blank lines. {{{1 +# +my %Results_by_Language = (); +my %Results_by_File = (); +my %Delta_by_Language = (); +my %Delta_by_File = (); +my $nFiles_added = 0; +my $nFiles_removed = 0; +my $nFiles_modified = 0; +my $nFiles_same = 0; + +foreach (my $F = 0; $F < scalar @fh - 1; $F++) { + # loop over file sets; do diff between set $F to $F+1 + + my $nCounted = 0; + + my @file_pairs = (); + my @files_added = (); + my @files_removed = (); + + align_by_pairs(\%{$unique_source_file{$fh[$F ]}} , # in + \%{$unique_source_file{$fh[$F+1]}} , # in + \@files_added , # out + \@files_removed , # out + \@file_pairs , # out + ); + if (!@file_pairs) { + # Special case where all files were either added or deleted. + # In this case, one of these arrays will be empty: + # @files_added, @files_removed + # so loop over both to cover both cases. + my $status = @files_added ? 'added' : 'removed'; + my $offset = @files_added ? 1 : 0 ; + foreach my $file (@files_added, @files_removed) { + my $Lang = $Language{$fh[$F+$offset]}{$file}; + my ($all_line_count, + $blank_count , + $comment_count , + ) = call_counter($file, $Lang, \@Errors); + my $code_count = $all_line_count-$blank_count-$comment_count; + if ($opt_by_file) { + $Delta_by_File{$file}{'code' }{$status} += $code_count ; + $Delta_by_File{$file}{'blank' }{$status} += $blank_count ; + $Delta_by_File{$file}{'comment'}{$status} += $comment_count; + $Delta_by_File{$file}{'lang' }{$status} = $Lang ; + $Delta_by_File{$file}{'nFiles' }{$status} += 1 ; + } + $Delta_by_Language{$Lang}{'code' }{$status} += $code_count ; + $Delta_by_Language{$Lang}{'blank' }{$status} += $blank_count ; + $Delta_by_Language{$Lang}{'comment'}{$status} += $comment_count; + } + } + #use Data::Dumper::Simple; + #use Data::Dumper; + #print Dumper(\@files_added, \@files_removed, \@file_pairs); + my @alignment = (); # only used if --diff-alignment +#print "after align_by_pairs:\n"; + +#print "added:\n"; + push @alignment, sprintf "Files added: %d\n", scalar @files_added + if $opt_diff_alignment; + foreach my $f (@files_added) { +#printf "%10s -> %s\n", $f, $Language{$fh[$F+1]}{$f}; + # Don't proceed unless the file (both L and R versions) + # is in a known language. + next if $Language{$fh[$F+1]}{$f} eq "(unknown)"; + next if $Exclude_Language{$Language{$fh[$F+1]}{$f}}; + push @alignment, sprintf " + %s ; %s\n", $f, $Language{$fh[$F+1]}{$f} + if $opt_diff_alignment; + ++$Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'nFiles'}{'added'}; + # Additionally, add contents of file $f to + # Delta_by_File{$f}{comment/blank/code}{'added'} + # Delta_by_Language{$lang}{comment/blank/code}{'added'} + my ($all_line_count, + $blank_count , + $comment_count , + ) = call_counter($f, $Language{$fh[$F+1]}{$f}, \@Errors); + $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'comment'}{'added'} += + $comment_count; + $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'blank'}{'added'} += + $blank_count; + $Delta_by_Language{ $Language{$fh[$F+1]}{$f} }{'code'}{'added'} += + $all_line_count - $blank_count - $comment_count; + $Delta_by_File{ $f }{'comment'}{'added'} = $comment_count; + $Delta_by_File{ $f }{'blank'}{'added'} = $blank_count; + $Delta_by_File{ $f }{'code'}{'added'} = + $all_line_count - $blank_count - $comment_count; + } + push @alignment, "\n"; + +#print "removed:\n"; + push @alignment, sprintf "Files removed: %d\n", scalar @files_removed + if $opt_diff_alignment; + foreach my $f (@files_removed) { + # Don't proceed unless the file (both L and R versions) + # is in a known language. + next if $Language{$fh[$F ]}{$f} eq "(unknown)"; + next if $Exclude_Language{$Language{$fh[$F ]}{$f}}; + ++$Delta_by_Language{ $Language{$fh[$F ]}{$f} }{'nFiles'}{'removed'}; + push @alignment, sprintf " - %s ; %s\n", $f, $Language{$fh[$F]}{$f} + if $opt_diff_alignment; +#printf "%10s -> %s\n", $f, $Language{$fh[$F ]}{$f}; + # Additionally, add contents of file $f to + # Delta_by_File{$f}{comment/blank/code}{'removed'} + # Delta_by_Language{$lang}{comment/blank/code}{'removed'} + my ($all_line_count, + $blank_count , + $comment_count , + ) = call_counter($f, $Language{$fh[$F ]}{$f}, \@Errors); + $Delta_by_Language{ $Language{$fh[$F ]}{$f} }{'comment'}{'removed'} += + $comment_count; + $Delta_by_Language{ $Language{$fh[$F ]}{$f} }{'blank'}{'removed'} += + $blank_count; + $Delta_by_Language{ $Language{$fh[$F ]}{$f} }{'code'}{'removed'} += + $all_line_count - $blank_count - $comment_count; + $Delta_by_File{ $f }{'comment'}{'removed'} = $comment_count; + $Delta_by_File{ $f }{'blank'}{'removed'} = $blank_count; + $Delta_by_File{ $f }{'code'}{'removed'} = + $all_line_count - $blank_count - $comment_count; + } + push @alignment, "\n"; + + push @alignment, sprintf "File pairs compared: %d\n", scalar @file_pairs + if $opt_diff_alignment; +#print "Language=\n", Dumper(\%Language); + foreach my $pair (@file_pairs) { + my $file_L = $pair->[0]; + my $file_R = $pair->[1]; + my $Lang_L = $Language{$fh[$F ]}{$file_L}; + my $Lang_R = $Language{$fh[$F+1]}{$file_R}; +#print "main step 6 file_L=$file_L file_R=$file_R\n"; + ++$nCounted; + printf "Counting: %d\r", $nCounted + unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate)); + next if $Ignored{$file_L}; + + # filter out excluded or unrecognized languages + if ($Exclude_Language{$Lang_L} or $Exclude_Language{$Lang_R}) { + $Ignored{$file_L} = "--exclude-lang=$Lang_L}"; + $Ignored{$file_R} = "--exclude-lang=$Lang_R}"; + next; + } + if (!defined @{$Filters_by_Language{$Lang_L} } or + !defined @{$Filters_by_Language{$Lang_R} } + ) { + if (($Lang_L eq "(unknown)") or ($Lang_R eq "(unknown)")) { + $Ignored{$fh[$F ]}{$file_L} = "language unknown (#1)"; + $Ignored{$fh[$F+1]}{$file_R} = "language unknown (#1)"; + } else { + $Ignored{$fh[$F ]}{$file_L} = "missing Filters_by_Language{$Lang_L}"; + $Ignored{$fh[$F+1]}{$file_R} = "missing Filters_by_Language{$Lang_R}"; + } + next; + } + +#print "DIFF($file_L, $file_R)\n"; + # step 0: compare the two files' contents + chomp ( my @lines_L = read_file($file_L) ); + chomp ( my @lines_R = read_file($file_R) ); + my $language_file_L = ""; + if (defined $Language{$fh[$F]}{$file_L}) { + $language_file_L = $Language{$fh[$F]}{$file_L}; + } else { + # files $file_L and $file_R do not contain known language + next; + } + my $contents_are_same = 1; + if (scalar @lines_L == scalar @lines_R) { + # same size, must compare line-by-line + for (my $i = 0; $i < scalar @lines_L; $i++) { + if ($lines_L[$i] ne $lines_R[$i]) { + $contents_are_same = 0; + last; + } + } + if ($contents_are_same) { + ++$Delta_by_Language{$language_file_L}{'nFiles'}{'same'}; + } else { + ++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'}; + } + } else { + $contents_are_same = 0; + # different sizes, contents have changed + ++$Delta_by_Language{$language_file_L}{'nFiles'}{'modified'}; + } + if ($opt_diff_alignment) { + my $str = "$file_L | $file_R ; $language_file_L"; + if ($contents_are_same) { + push @alignment, " == $str"; + } else { + push @alignment, " != $str"; + } + } + + # step 1: identify comments in both files +#print "Diff blank removal L language= $Lang_L"; +#print " scalar(lines_L)=", scalar @lines_L, "\n"; + my @original_minus_blanks_L + = rm_blanks( \@lines_L, $Lang_L, \%EOL_Continuation_re); +#print "1: scalar(original_minus_blanks_L)=", scalar @original_minus_blanks_L, "\n"; + @lines_L = @original_minus_blanks_L; +#print "2: scalar(lines_L)=", scalar @lines_L, "\n"; + @lines_L = add_newlines(\@lines_L); # compensate for rm_comments() + @lines_L = rm_comments( \@lines_L, $Lang_L, $file_L, + \%EOL_Continuation_re); +#print "3: scalar(lines_L)=", scalar @lines_L, "\n"; + +#print "Diff blank removal R language= $Lang_R\n"; + my @original_minus_blanks_R + = rm_blanks( \@lines_R, $Lang_R, \%EOL_Continuation_re); + @lines_R = @original_minus_blanks_R; + @lines_R = add_newlines(\@lines_R); # taken away by rm_comments() + @lines_R = rm_comments( \@lines_R, $Lang_R, $file_R, + \%EOL_Continuation_re); + + my (@diff_LL, @diff_LR, ); + array_diff( $file_L , # in + \@original_minus_blanks_L , # in + \@lines_L , # in + "comment" , # in + \@diff_LL, \@diff_LR , # out + \@Errors); # in/out + + my (@diff_RL, @diff_RR, ); + array_diff( $file_R , # in + \@original_minus_blanks_R , # in + \@lines_R , # in + "comment" , # in + \@diff_RL, \@diff_RR , # out + \@Errors); # in/out + # each line of each file is now classified as + # code or comment + +#use Data::Dumper; +#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, ); +#print Dumper("diff_RL", \@diff_RL, "diff_RR", \@diff_RR, ); +#die; + # step 2: separate code from comments for L and R files + my @code_L = (); + my @code_R = (); + my @comm_L = (); + my @comm_R = (); + foreach my $line_info (@diff_LL) { + if ($line_info->{'type'} eq "code" ) { + push @code_L, $line_info->{char}; + } elsif ($line_info->{'type'} eq "comment") { + push @comm_L, $line_info->{char}; + } else { + die "Diff unexpected line type ", + $line_info->{'type'}, "for $file_L line ", + $line_info->{'lnum'}; + } + } + foreach my $line_info (@diff_RL) { + if ($line_info->{type} eq "code" ) { + push @code_R, $line_info->{'char'}; + } elsif ($line_info->{type} eq "comment") { + push @comm_R, $line_info->{'char'}; + } else { + die "Diff unexpected line type ", + $line_info->{'type'}, "for $file_R line ", + $line_info->{'lnum'}; + } + } + + if ($opt_ignore_whitespace) { + # strip all whitespace from each line of source code + # and comments then use these stripped arrays in diffs + foreach (@code_L) { s/\s+//g } + foreach (@code_R) { s/\s+//g } + foreach (@comm_L) { s/\s+//g } + foreach (@comm_R) { s/\s+//g } + } + if ($opt_ignore_case) { + # change all text to lowercase in diffs + foreach (@code_L) { $_ = lc } + foreach (@code_R) { $_ = lc } + foreach (@comm_L) { $_ = lc } + foreach (@comm_R) { $_ = lc } + } + # step 3: compute code diffs + array_diff("$file_L v. $file_R" , # in + \@code_L , # in + \@code_R , # in + "revision" , # in + \@diff_LL, \@diff_LR , # out + \@Errors); # in/out +#print Dumper("diff_LL", \@diff_LL, "diff_LR", \@diff_LR, ); +#print Dumper("diff_LR", \@diff_LR); + foreach my $line_info (@diff_LR) { + my $status = $line_info->{'desc'}; # same|added|removed|modified + ++$Delta_by_Language{$Lang_L}{'code'}{$status}; + if ($opt_by_file) { + ++$Delta_by_File{$file_L}{'code'}{$status}; + } + } +#use Data::Dumper; +#print Dumper("code diffs:", \@diff_LL, \@diff_LR); + + # step 4: compute comment diffs + array_diff("$file_L v. $file_R" , # in + \@comm_L , # in + \@comm_R , # in + "revision" , # in + \@diff_LL, \@diff_LR , # out + \@Errors); # in/out +#print Dumper("comment diff_LR", \@diff_LR); + foreach my $line_info (@diff_LR) { + my $status = $line_info->{'desc'}; # same|added|removed|modified + ++$Delta_by_Language{$Lang_L}{'comment'}{$status}; + if ($opt_by_file) { + ++$Delta_by_File{$file_L}{'comment'}{$status}; + } + } +#print Dumper("comment diffs:", \@diff_LL, \@diff_LR); +#die; here= need to save original line number in diff result for html display + + # step 5: compute difference in blank lines (kind of pointless) + my ($all_line_count_L, + $blank_count_L , + $comment_count_L , + ) = call_counter($file_L, $Lang_L, \@Errors); + + my ($all_line_count_R, + $blank_count_R , + $comment_count_R , + ) = call_counter($file_R, $Lang_R, \@Errors); + + if ($blank_count_L < $blank_count_R) { + my $D = $blank_count_R - $blank_count_L; + $Delta_by_Language{$Lang_L}{'blank'}{'added'} += $D; + } else { + my $D = $blank_count_L - $blank_count_R; + $Delta_by_Language{$Lang_L}{'blank'}{'removed'} += $D; + } + if ($opt_by_file) { + if ($blank_count_L < $blank_count_R) { + my $D = $blank_count_R - $blank_count_L; + $Delta_by_File{$file_L}{'blank'}{'added'} += $D; + } else { + my $D = $blank_count_L - $blank_count_R; + $Delta_by_File{$file_L}{'blank'}{'removed'} += $D; + } + } + + my $code_count_L = $all_line_count_L-$blank_count_L-$comment_count_L; + if ($opt_by_file) { + $Results_by_File{$file_L}{'code' } = $code_count_L ; + $Results_by_File{$file_L}{'blank' } = $blank_count_L ; + $Results_by_File{$file_L}{'comment'} = $comment_count_L ; + $Results_by_File{$file_L}{'lang' } = $Lang_L ; + $Results_by_File{$file_L}{'nFiles' } = 1 ; + } else { + $Results_by_File{$file_L} = 1; # just keep track of counted files + } + + $Results_by_Language{$Lang_L}{'nFiles'}++; + $Results_by_Language{$Lang_L}{'code'} += $code_count_L ; + $Results_by_Language{$Lang_L}{'blank'} += $blank_count_L ; + $Results_by_Language{$Lang_L}{'comment'} += $comment_count_L; + } + write_file($opt_diff_alignment, @alignment) if $opt_diff_alignment; + +} +#use Data::Dumper; +#print Dumper("Delta_by_Language:" , \%Delta_by_Language); +#print Dumper("Results_by_Language:", \%Results_by_Language); +#print Dumper("Delta_by_File:" , \%Delta_by_File); +#print Dumper("Results_by_File:" , \%Results_by_File); +#die; +my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored; +write_file($opt_ignored, @ignored_reasons ) if $opt_ignored; +write_file($opt_counted, sort keys %Results_by_File) if $opt_counted; +# 1}}} +# Step 7: Assemble results. {{{1 +# +my $end_time = time(); +printf "%8d file%s ignored. \n", + plural_form(scalar keys %Ignored) unless $opt_quiet; +print_errors(\%Error_Codes, \@Errors) if @Errors; +if (!%Delta_by_Language) { + print "Nothing to count.\n"; + exit; +} + +if ($opt_by_file) { + @Lines_Out = diff_report($VERSION, time() - $start_time, + "by file", + \%Delta_by_File, \%Scale_Factor); +} else { + @Lines_Out = diff_report($VERSION, time() - $start_time, + "by language", + \%Delta_by_Language, \%Scale_Factor); +} + +# 1}}} +} else { +# Step 4: Separate code from non-code files. {{{1 +my $fh = 0; +if ($opt_list_file) { + my @list = read_list_file($opt_list_file); + $fh = make_file_list(\@list, \%Error_Codes, \@Errors, \%Ignored); +} else { + $fh = make_file_list(\@ARGV, \%Error_Codes, \@Errors, \%Ignored); + # make_file_list populates global variable @file_list via call to + # File::Find's find() which in turn calls files() +} +if ($opt_exclude_list_file) { + process_exclude_list_file($opt_exclude_list_file, + \%Exclude_Dir, + \%Ignored); +} +if ($opt_skip_win_hidden and $ON_WINDOWS) { + my @file_list_minus_hidded = (); + # eval code to run on Unix without 'missing Win32::File module' error. + my $win32_file_invocation = ' + use Win32::File; + foreach my $F (@file_list) { + my $attr = undef; + Win32::File::GetAttributes($F, $attr); + if ($attr & HIDDEN) { + $Ignored{$F} = "Windows hidden file"; + print "Ignoring $F since it is a Windows hidden file\n" + if $opt_v > 1; + } else { + push @file_list_minus_hidded, $F; + } + }'; + eval $win32_file_invocation; + @file_list = @file_list_minus_hidded; +} +#printf "%8d file%s excluded. \n", +# plural_form(scalar keys %Ignored) +# unless $opt_quiet; +# die print ": ", join("\n: ", @file_list), "\n"; +# 1}}} +# Step 5: Remove duplicate files. {{{1 +# +my %Language = (); +my %unique_source_file = (); +remove_duplicate_files($fh , # in + \%Language , # out + \%unique_source_file , # out + \%Error_Codes , # in + \@Errors , # out + \%Ignored ); # out +printf "%8d unique file%s. \n", + plural_form(scalar keys %unique_source_file) + unless $opt_quiet; +# 1}}} +# Step 6: Count code, comments, blank lines. {{{1 +# + +my %Results_by_Language = (); +my %Results_by_File = (); +my $nCounted = 0; +foreach my $file (sort keys %unique_source_file) { + ++$nCounted; + printf "Counting: %d\r", $nCounted + unless (!$opt_progress_rate or ($nCounted % $opt_progress_rate)); + next if $Ignored{$file}; + if ($Exclude_Language{$Language{$file}}) { + $Ignored{$file} = "--exclude-lang=$Language{$file}"; + next; + } + if (!defined @{$Filters_by_Language{$Language{$file}} }) { + if ($Language{$file} eq "(unknown)") { + $Ignored{$file} = "language unknown (#1)"; + } else { + $Ignored{$file} = "missing Filters_by_Language{$Language{$file}}"; + } + next; + } + + my ($all_line_count, + $blank_count , + $comment_count , + ) = call_counter($file, $Language{$file}, \@Errors); + my $code_count = $all_line_count - $blank_count - $comment_count; + if ($opt_by_file) { + $Results_by_File{$file}{'code' } = $code_count ; + $Results_by_File{$file}{'blank' } = $blank_count ; + $Results_by_File{$file}{'comment'} = $comment_count ; + $Results_by_File{$file}{'lang' } = $Language{$file}; + $Results_by_File{$file}{'nFiles' } = 1; + } else { + $Results_by_File{$file} = 1; # just keep track of counted files + } + + $Results_by_Language{$Language{$file}}{'nFiles'}++; + $Results_by_Language{$Language{$file}}{'code'} += $code_count ; + $Results_by_Language{$Language{$file}}{'blank'} += $blank_count ; + $Results_by_Language{$Language{$file}}{'comment'} += $comment_count; +} +my @ignored_reasons = map { "$_: $Ignored{$_}" } sort keys %Ignored; +write_file($opt_ignored, @ignored_reasons ) if $opt_ignored; +write_file($opt_counted, sort keys %Results_by_File) if $opt_counted; +# 1}}} +# Step 7: Assemble results. {{{1 +# +my $end_time = time(); +printf "%8d file%s ignored.\n", plural_form(scalar keys %Ignored) + unless $opt_quiet; +print_errors(\%Error_Codes, \@Errors) if @Errors; +exit unless %Results_by_Language; + +generate_sql($end_time - $start_time, + \%Results_by_File, \%Scale_Factor) if $opt_sql; + +exit if $skip_generate_report; +if ($opt_by_file_by_lang) { + push @Lines_Out, generate_report( $VERSION, $end_time - $start_time, + "by file", + \%Results_by_File, \%Scale_Factor); + push @Lines_Out, generate_report( $VERSION, $end_time - $start_time, + "by language", + \%Results_by_Language, \%Scale_Factor); +} elsif ($opt_by_file) { + push @Lines_Out, generate_report( $VERSION, $end_time - $start_time, + "by file", + \%Results_by_File, \%Scale_Factor); +} else { + push @Lines_Out, generate_report( $VERSION, $end_time - $start_time, + "by language", + \%Results_by_Language, \%Scale_Factor); +} +# 1}}} +} +if ($opt_report_file) { write_file($opt_report_file, @Lines_Out); } +else { print "\n", join("\n", @Lines_Out), "\n"; } + +sub process_exclude_list_file { # {{{1 + my ($list_file , # in + $rh_exclude_dir , # out + $rh_ignored , # out + ) = @_; + print "-> process_exclude_list_file($list_file)\n" if $opt_v > 2; + # reject a specific set of files and/or directories + my @reject_list = read_list_file($list_file); + my @file_reject_list = (); + foreach my $F_or_D (@reject_list) { + if (is_dir($F_or_D)) { + $rh_exclude_dir->{$F_or_D} = 1; + } elsif (is_file($F_or_D)) { + push @file_reject_list, $F_or_D; + } + } + + # Normalize file names for better comparison. + my %normalized_input = normalize_file_names(@file_list); + my %normalized_reject = normalize_file_names(@file_reject_list); + my %normalized_exclude = normalize_file_names(keys %{$rh_exclude_dir}); + foreach my $F (keys %normalized_input) { + if ($normalized_reject{$F} or is_excluded($F, \%normalized_exclude)) { + my $orig_F = $normalized_input{$F}; + $rh_ignored->{$orig_F} = "listed in exclusion file $opt_exclude_list_file"; + print "Ignoring $orig_F because it appears in $opt_exclude_list_file\n" + if $opt_v > 1; + } + } + print "<- process_exclude_list_file\n" if $opt_v > 2; +} # 1}}} +sub combine_results { # {{{1 + # returns 1 if the inputs are categorized by language + # 0 if no identifiable language was found + my ($ra_report_files, # in + $report_type , # in "by language" or "by report file" + $rhh_count , # out count{TYPE}{nFiles|code|blank|comment|scaled} + $rhaa_Filters_by_Language , # in + ) = @_; + + print "-> combine_results(report_type=$report_type)\n" if $opt_v > 2; + my $found_language = 0; + + foreach my $file (@{$ra_report_files}) { + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + next; + } + while (<$IN>) { + next if /^(http|Language|SUM|-----)/; + if (!$opt_by_file and + m{^(.*?)\s+ # language + (\d+)\s+ # files + (\d+)\s+ # blank + (\d+)\s+ # comments + (\d+)\s+ # code + ( # next four entries missing with -nno3 + x\s+ # x + \d+\.\d+\s+ # scale + =\s+ # = + (\d+\.\d+)\s* # scaled code + )? + $}x) { + if ($report_type eq "by language") { + next unless defined @{$rhaa_Filters_by_Language->{$1}}; + # above test necessary to avoid trying to sum reports + # of reports (which have no language breakdown). + $found_language = 1; + $rhh_count->{$1 }{'nFiles' } += $2; + $rhh_count->{$1 }{'blank' } += $3; + $rhh_count->{$1 }{'comment'} += $4; + $rhh_count->{$1 }{'code' } += $5; + $rhh_count->{$1 }{'scaled' } += $7 if $opt_3; + } else { + $rhh_count->{$file}{'nFiles' } += $2; + $rhh_count->{$file}{'blank' } += $3; + $rhh_count->{$file}{'comment'} += $4; + $rhh_count->{$file}{'code' } += $5; + $rhh_count->{$file}{'scaled' } += $7 if $opt_3; + } + } elsif ($opt_by_file and + m{^(.*?)\s+ # language + (\d+)\s+ # blank + (\d+)\s+ # comments + (\d+)\s+ # code + ( # next four entries missing with -nno3 + x\s+ # x + \d+\.\d+\s+ # scale + =\s+ # = + (\d+\.\d+)\s* # scaled code + )? + $}x) { + if ($report_type eq "by language") { + next unless %{$rhaa_Filters_by_Language->{$1}}; + # above test necessary to avoid trying to sum reports + # of reports (which have no language breakdown). + $found_language = 1; + $rhh_count->{$1 }{'nFiles' } += 1; + $rhh_count->{$1 }{'blank' } += $2; + $rhh_count->{$1 }{'comment'} += $3; + $rhh_count->{$1 }{'code' } += $4; + $rhh_count->{$1 }{'scaled' } += $6 if $opt_3; + } else { + $rhh_count->{$file}{'nFiles' } += 1; + $rhh_count->{$file}{'blank' } += $2; + $rhh_count->{$file}{'comment'} += $3; + $rhh_count->{$file}{'code' } += $4; + $rhh_count->{$file}{'scaled' } += $6 if $opt_3; + } + } + } + } + print "<- combine_results\n" if $opt_v > 2; + return $found_language; + +} # 1}}} +sub diff_report { # {{{1 + # returns an array of lines containing the results + print "-> diff_report\n" if $opt_v > 2; + + if ($opt_xml or $opt_yaml) { + print "<- diff_report\n" if $opt_v > 2; + return diff_xml_yaml_report(@_) + } elsif ($opt_csv) { + print "<- diff_report\n" if $opt_v > 2; + return diff_csv_report(@_) + } + + my ($version , # in + $elapsed_sec, # in + $report_type, # in "by language" | "by report file" | "by file" + $rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s} + $rh_scale , # in + ) = @_; + +#print "diff_report: ", Dumper($rhhh_count), "\n"; + my @results = (); + + my $languages = (); + my %sum = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed} + my $max_len = 0; + foreach my $language (keys %{$rhhh_count}) { + foreach my $V (qw(nFiles blank comment code)) { + foreach my $S (qw(added same modified removed)) { + $rhhh_count->{$language}{$V}{$S} = 0 unless + defined $rhhh_count->{$language}{$V}{$S}; + $sum{$V}{$S} += $rhhh_count->{$language}{$V}{$S}; + } + } + $max_len = length($language) if length($language) > $max_len; + } + my $column_1_offset = 0; + $column_1_offset = $max_len - 17 if $max_len > 17; + $elapsed_sec = 0.5 unless $elapsed_sec; + + my $spacing_0 = 23; + my $spacing_1 = 13; + my $spacing_2 = 9; + my $spacing_3 = 17; + if (!$opt_3) { + $spacing_1 = 19; + $spacing_2 = 14; + $spacing_3 = 28; + } + $spacing_0 += $column_1_offset; + $spacing_1 += $column_1_offset; + $spacing_3 += $column_1_offset; + my %Format = ( + '1' => { 'xml' => 'name="%s" ', + 'txt' => "\%-${spacing_0}s ", + }, + '2' => { 'xml' => 'name="%s" ', + 'txt' => "\%-${spacing_3}s ", + }, + '3' => { 'xml' => 'files_count="%d" ', + 'txt' => '%5d ', + }, + '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ', + 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d", + }, + '5' => { 'xml' => 'factor="%.2f" scaled="%.2f" ', + 'txt' => ' x %6.2f = %14.2f', + }, + ); + my $Style = "txt"; + $Style = "xml" if $opt_xml ; + $Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt + $Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt + + my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset); + $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset) + if (!$opt_3) and (68 + $column_1_offset) > 79; + my $data_line = ""; + my $first_column; + my $BY_LANGUAGE = 0; + my $BY_FILE = 0; + if ($report_type eq "by language") { + $first_column = "Language"; + $BY_LANGUAGE = 1; + } elsif ($report_type eq "by file") { + $first_column = "File"; + $BY_FILE = 1; + } else { + $first_column = "Report File"; + } + + my $header_line = sprintf "%s v %4.2f", $URL, $version; + my $sum_files = 1; + my $sum_lines = 1; + $header_line .= sprintf(" T=%.1f s (%.1f files/s, %.1f lines/s)", + $elapsed_sec , + $sum_files/$elapsed_sec, + $sum_lines/$elapsed_sec) unless $opt_sum_reports; + if ($Style eq "txt") { + push @results, output_header($header_line, $hyphen_line, $BY_FILE); + } elsif ($Style eq "csv") { + die "csv"; + } + + # column headers + if (!$opt_3 and $BY_FILE) { + my $spacing_n = $spacing_1 - 11; + $data_line = sprintf "%-${spacing_n}s" , $first_column; + } else { + $data_line = sprintf "%-${spacing_1}s ", $first_column; + } + if ($BY_FILE) { + $data_line .= sprintf "%${spacing_2}s" , "" ; + } else { + $data_line .= sprintf "%${spacing_2}s " , "files"; + } + $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s", + "blank" , + "comment" , + "code"; + + if ($Style eq "txt") { + push @results, $data_line; + push @results, $hyphen_line; + } + + foreach my $lang_or_file (sort { + $rhhh_count->{$b}{'code'} <=> + $rhhh_count->{$a}{'code'} + } + keys %{$rhhh_count}) { + + push @results, "$lang_or_file"; + foreach my $S (qw(same modified added removed)) { + my $indent = $spacing_1 - 2; + my $line .= sprintf " %-${indent}s", $S; + if ($BY_FILE) { + $line .= sprintf " "; + } else { + $line .= sprintf " %${spacing_2}s", $rhhh_count->{$lang_or_file}{'nFiles'}{$S}; + } + $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s", + $rhhh_count->{$lang_or_file}{'blank'}{$S} , + $rhhh_count->{$lang_or_file}{'comment'}{$S} , + $rhhh_count->{$lang_or_file}{'code'}{$S} ; + push @results, $line; + } + } + push @results, "-" x 79; + push @results, "SUM:"; + foreach my $S (qw(same modified added removed)) { + my $indent = $spacing_1 - 2; + my $line .= sprintf " %-${indent}s", $S; + if ($BY_FILE) { + $line .= sprintf " "; + } else { + $line .= sprintf " %${spacing_2}s", $sum{'nFiles'}{$S}; + } + $line .= sprintf " %${spacing_2}s %${spacing_2}s %${spacing_2}s", + $sum{'blank'}{$S} , + $sum{'comment'}{$S} , + $sum{'code'}{$S} ; + push @results, $line; + } + push @results, "-" x 79; + write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL; + print "<- diff_report\n" if $opt_v > 2; + + return @results; +} # 1}}} +sub xml_or_yaml_header { # {{{1 + my ($URL, $version, $elapsed_sec, $sum_files, $sum_lines) = @_; + my $header = ""; + my $file_rate = $sum_files/$elapsed_sec; + my $line_rate = $sum_lines/$elapsed_sec; + my $type = ""; + $type = "diff_" if $opt_diff; + my $report_file = ""; + $report_file = " $opt_report_file" + if $opt_report_file; + if ($opt_xml) { + $header = ""; + $header .= "\n" if $opt_xsl; + $header .= "<${type}results> +
+ $URL + $version + $elapsed_sec + $sum_files + $sum_lines + $file_rate + $line_rate"; + $header .= "\n$report_file" + if $opt_report_file; + $header .= "\n
"; + } elsif ($opt_yaml) { + $header = "---\n# $URL +header : + cloc_url : http://cloc.sourceforge.net + cloc_version : $version + elapsed_seconds : $elapsed_sec + n_files : $sum_files + n_lines : $sum_lines + files_per_second : $file_rate + lines_per_second : $line_rate"; + $header .= "\n report_file : $opt_report_file" + if $opt_report_file; + } + return $header; +} # 1}}} +sub diff_xml_yaml_report { # {{{1 + # returns an array of lines containing the results + my ($version , # in + $elapsed_sec, # in + $report_type, # in "by language" | "by report file" | "by file" + $rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s} + $rh_scale , # in + ) = @_; + print "-> diff_xml_yaml_report\n" if $opt_v > 2; + +#print "diff_report: ", Dumper($rhhh_count), "\n"; + my @results = (); + + my $languages = (); + my %sum = (); # sum{nFiles|blank|comment|code}{same|modified|added|removed} + + my $sum_files = 0; + my $sum_lines = 0; + foreach my $language (keys %{$rhhh_count}) { + foreach my $V (qw(nFiles blank comment code)) { + foreach my $S (qw(added same modified removed)) { + $rhhh_count->{$language}{$V}{$S} = 0 unless + defined $rhhh_count->{$language}{$V}{$S}; + $sum{$V}{$S} += $rhhh_count->{$language}{$V}{$S}; + if ($V eq "nFiles") { + $sum_files += $rhhh_count->{$language}{$V}{$S}; + } else { + $sum_lines += $rhhh_count->{$language}{$V}{$S}; + } + } + } + } + $elapsed_sec = 0.5 unless $elapsed_sec; + + my $data_line = ""; + my $BY_LANGUAGE = 0; + my $BY_FILE = 0; + if ($report_type eq "by language") { + $BY_LANGUAGE = 1; + } elsif ($report_type eq "by file") { + $BY_FILE = 1; + } + + if (!$ALREADY_SHOWED_HEADER) { + push @results, + xml_or_yaml_header($URL, $version, $elapsed_sec, + $sum_files, $sum_lines); + $ALREADY_SHOWED_HEADER = 1; + } + + foreach my $S (qw(same modified added removed)) { + if ($opt_xml) { + push @results, " <$S>"; + } elsif ($opt_yaml) { + push @results, "$S :"; + } + foreach my $lang_or_file (sort { + $rhhh_count->{$b}{'code'} <=> + $rhhh_count->{$a}{'code'} + } + keys %{$rhhh_count}) { + my $L = ""; + if ($opt_xml) { + if ($BY_FILE) { + $L .= sprintf " {$lang_or_file}{'nFiles'}{$S}; + } + foreach my $T (qw(blank comment code)) { + $L .= sprintf "%s=\"%d\" ", + $T, $rhhh_count->{$lang_or_file}{$T}{$S}; + } + push @results, $L . "/>"; + } elsif ($opt_yaml) { + if ($BY_FILE) { + push @results, sprintf " - file : %s", $lang_or_file; + push @results, sprintf " files_count : 1", + } else { + push @results, sprintf " - language : %s", $lang_or_file; + push @results, sprintf " files_count : %d", + $rhhh_count->{$lang_or_file}{'nFiles'}{$S}; + } + foreach my $T (qw(blank comment code)) { + push @results, sprintf " %s : %d", + $T, $rhhh_count->{$lang_or_file}{$T}{$S}; + } + } + } + + if ($opt_xml) { + my $L = sprintf " "; + push @results, " "; + } elsif ($opt_yaml) { + push @results, sprintf "%s_total :\n sum_files : %d", + $S, $sum{'nFiles'}{$S}; + foreach my $V (qw(blank comment code)) { + push @results, sprintf " %s : %d", $V, $sum{$V}{$S}; + } + } + } + + if ($opt_xml) { + push @results, ""; + } + write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL; + print "<- diff_xml_yaml_report\n" if $opt_v > 2; + return @results; +} # 1}}} +sub diff_csv_report { # {{{1 + # returns an array of lines containing the results + my ($version , # in + $elapsed_sec, # in + $report_type, # in "by language" | "by report file" | "by file" + $rhhh_count , # in count{TYPE}{nFiles|code|blank|comment}{a|m|r|s} + $rh_scale , # in unused + ) = @_; + print "-> diff_csv_report\n" if $opt_v > 2; + +#use Data::Dumper; +#print "diff_csv_report: ", Dumper($rhhh_count), "\n"; +#die; + my @results = (); + my $languages = (); + + my $data_line = ""; + my $BY_LANGUAGE = 0; + my $BY_FILE = 0; + if ($report_type eq "by language") { + $BY_LANGUAGE = 1; + } elsif ($report_type eq "by file") { + $BY_FILE = 1; + } + + $elapsed_sec = 0.5 unless $elapsed_sec; + + my $line = "Language, "; + $line = "File, " if $BY_FILE; + foreach my $item (qw(files blank comment code)) { + next if $BY_FILE and $item eq 'files'; + foreach my $symbol qw( == != + - ) { + $line .= "$symbol $item, "; + } + } + $line .= "\"$URL v $version T=$elapsed_sec s\""; + push @results, $line; + + foreach my $lang_or_file (sort { + $rhhh_count->{$b}{'code'} <=> + $rhhh_count->{$a}{'code'} + } + keys %{$rhhh_count}) { + $line = "$lang_or_file, "; + foreach my $item (qw(nFiles blank comment code)) { + next if $BY_FILE and $item eq 'nFiles'; + foreach my $symbol (qw(same modified added removed)) { + if (defined $rhhh_count->{$lang_or_file}{$item}{$symbol}) { + $line .= "$rhhh_count->{$lang_or_file}{$item}{$symbol}, "; + } else { + $line .= "0, "; + } + } + } + push @results, $line; + } + + print "<- diff_csv_report\n" if $opt_v > 2; + return @results; +} # 1}}} +sub generate_sql { # {{{1 + my ($elapsed_sec, # in + $rhh_count , # in count{TYPE}{lang|code|blank|comment|scaled} + $rh_scale , # in + ) = @_; + print "-> generate_sql\n" if $opt_v > 2; + + $opt_sql_project = cwd() unless defined $opt_sql_project; + $opt_sql_project =~ s{/}{\\}g if $ON_WINDOWS; + + my $schema = " +create table metadata ( -- $URL v $VERSION + timestamp text, + Project text, + elapsed_s real); +create table t ( + Project text , + Language text , + File text , + nBlank integer, + nComment integer, + nCode integer, + nScaled real ); +"; + $opt_sql = "-" if $opt_sql eq "1"; + + my $open_mode = ">"; + $open_mode = ">>" if $opt_sql_append; + + my $fh = new IO::File; # $opt_sql, "w"; + if (!$fh->open("${open_mode}${opt_sql}")) { + die "Unable to write to $opt_sql $!\n"; + } + print $fh $schema unless defined $opt_sql_append; + + print $fh "begin transaction;\n"; + printf $fh "insert into metadata values('%s', '%s', %f);\n", + strftime("%Y-%m-%d %H:%M:%S", localtime(time())), + $opt_sql_project, $elapsed_sec; + + my $nIns = 0; + foreach my $file (keys %{$rhh_count}) { + my $language = $rhh_count->{$file}{'lang'}; + my $clean_filename = $file; + # If necessary (that is, if the input contained an + # archive file [.tar.gz, etc]), strip the temporary + # directory name which was used to expand the archive + # from the file name. + foreach my $temp_d (keys %TEMP_DIR) { + if ($ON_WINDOWS) { + # \ -> / necessary to allow the next if test's + # m{} to work in the presence of spaces in file names + $temp_d =~ s{\\}{/}g; + $clean_filename =~ s{\\}{/}g; + } + if ($clean_filename =~ m{^$temp_d/}) { + $clean_filename =~ s{^$temp_d/}{}; + last; + } + } + $clean_filename =~ s{/}{\\}g if $ON_WINDOWS; # then go back from / to \ + printf $fh "insert into t values('%s', '%s', '%s', %d, %d, %d, %f);\n", + $opt_sql_project , + $language , + $clean_filename , + $rhh_count->{$file}{'blank'}, + $rhh_count->{$file}{'comment'}, + $rhh_count->{$file}{'code'} , + $rhh_count->{$file}{'code'}*$rh_scale->{$language}; + ++$nIns; + if (!($nIns % 10_000)) { + print $fh "commit;\n"; + print $fh "begin transaction;\n"; + } + } + print $fh "commit;\n"; + + $fh->close unless $opt_sql eq "-"; # don't try to close STDOUT + print "<- generate_sql\n" if $opt_v > 2; + + # sample query: + # + # select project, language, + # sum(nCode) as Code, + # sum(nComment) as Comments, + # sum(nBlank) as Blank, + # sum(nCode)+sum(nComment)+sum(nBlank) as All_Lines, + # 100.0*sum(nComment)/(sum(nCode)+sum(nComment)) as Comment_Pct + # from t group by Project, Language order by Project, Code desc; + # +} # 1}}} +sub output_header { # {{{1 + my ($header_line, + $hyphen_line, + $BY_FILE ,) = @_; + print "-> output_header\n" if $opt_v > 2; + my @R = (); + if ($opt_xml) { + if (!$ALREADY_SHOWED_XML_SECTION) { + push @R, ""; + push @R, '' if $opt_xsl; + push @R, ""; + push @R, "
$header_line
"; + $ALREADY_SHOWED_XML_SECTION = 1; + } + if ($BY_FILE) { + push @R, ""; + } else { + push @R, ""; + } + } elsif ($opt_yaml) { + push @R, "---\n# $header_line"; + } elsif ($opt_csv) { + # append the header to the end of the column headers + # to keep the output a bit cleaner from a spreadsheet + # perspective + } else { + if ($ALREADY_SHOWED_HEADER) { + push @R, ""; + } else { + push @R, $header_line; + $ALREADY_SHOWED_HEADER = 1; + } + push @R, $hyphen_line; + } + print "<- output_header\n" if $opt_v > 2; + return @R; +} # 1}}} +sub generate_report { # {{{1 + # returns an array of lines containing the results + my ($version , # in + $elapsed_sec, # in + $report_type, # in "by language" | "by report file" | "by file" + $rhh_count , # in count{TYPE}{nFiles|code|blank|comment|scaled} + $rh_scale , # in + ) = @_; + + print "-> generate_report\n" if $opt_v > 2; + my @results = (); + + my $languages = (); + + my $sum_files = 0; + my $sum_code = 0; + my $sum_blank = 0; + my $sum_comment = 0; + my $max_len = 0; + foreach my $language (keys %{$rhh_count}) { + $sum_files += $rhh_count->{$language}{'nFiles'} ; + $sum_blank += $rhh_count->{$language}{'blank'} ; + $sum_comment += $rhh_count->{$language}{'comment'}; + $sum_code += $rhh_count->{$language}{'code'} ; + $max_len = length($language) if length($language) > $max_len; + } + my $column_1_offset = 0; + $column_1_offset = $max_len - 17 if $max_len > 17; + my $sum_lines = $sum_blank + $sum_comment + $sum_code; + $elapsed_sec = 0.5 unless $elapsed_sec; + + my $spacing_0 = 23; + my $spacing_1 = 13; + my $spacing_2 = 9; + my $spacing_3 = 17; + if (!$opt_3) { + $spacing_1 = 19; + $spacing_2 = 14; + $spacing_3 = 28; + } + $spacing_0 += $column_1_offset; + $spacing_1 += $column_1_offset; + $spacing_3 += $column_1_offset; + my %Format = ( + '1' => { 'xml' => 'name="%s" ', + 'txt' => "\%-${spacing_0}s ", + }, + '2' => { 'xml' => 'name="%s" ', + 'txt' => "\%-${spacing_3}s ", + }, + '3' => { 'xml' => 'files_count="%d" ', + 'txt' => '%5d ', + }, + '4' => { 'xml' => 'blank="%d" comment="%d" code="%d" ', + 'txt' => "\%${spacing_2}d \%${spacing_2}d \%${spacing_2}d", + }, + '5' => { 'xml' => 'factor="%.2f" scaled="%.2f" ', + 'txt' => ' x %6.2f = %14.2f', + }, + ); + my $Style = "txt"; + $Style = "xml" if $opt_xml ; + $Style = "xml" if $opt_yaml; # not a typo; just set to anything but txt + $Style = "xml" if $opt_csv ; # not a typo; just set to anything but txt + + my $hyphen_line = sprintf "%s", '-' x (79 + $column_1_offset); + $hyphen_line = sprintf "%s", '-' x (68 + $column_1_offset) + if (!$opt_3) and (68 + $column_1_offset) > 79; + my $data_line = ""; + my $first_column; + my $BY_LANGUAGE = 0; + my $BY_FILE = 0; + if ($report_type eq "by language") { + $first_column = "Language"; + $BY_LANGUAGE = 1; + } elsif ($report_type eq "by file") { + $first_column = "File"; + $BY_FILE = 1; + } else { + $first_column = "Report File"; + } + + my $header_line = sprintf "%s v %4.2f", $URL, $version; + $header_line .= sprintf(" T=%.1f s (%.1f files/s, %.1f lines/s)", + $elapsed_sec , + $sum_files/$elapsed_sec, + $sum_lines/$elapsed_sec) unless $opt_sum_reports; + if ($opt_xml or $opt_yaml) { + if (!$ALREADY_SHOWED_HEADER) { + push @results, xml_or_yaml_header($URL, $version, $elapsed_sec, + $sum_files, $sum_lines); + $ALREADY_SHOWED_HEADER = 1; + } + if ($BY_FILE) { + push @results, ""; + } else { + push @results, ""; + } + } else { + push @results, output_header($header_line, $hyphen_line, $BY_FILE); + } + + if ($Style eq "txt") { + # column headers + if (!$opt_3 and $BY_FILE) { + my $spacing_n = $spacing_1 - 11; + $data_line = sprintf "%-${spacing_n}s ", $first_column; + } else { + $data_line = sprintf "%-${spacing_1}s ", $first_column; + } + if ($BY_FILE) { + $data_line .= sprintf "%${spacing_2}s " , " " ; + } else { + $data_line .= sprintf "%${spacing_2}s " , "files"; + } + $data_line .= sprintf "%${spacing_2}s %${spacing_2}s %${spacing_2}s", + "blank" , + "comment" , + "code"; + $data_line .= sprintf " %8s %14s", + "scale" , + "3rd gen. equiv" + if $opt_3; + push @results, $data_line; + push @results, $hyphen_line; + } + if ($opt_csv) { + my $header2; + if ($BY_FILE) { + $header2 = "language,filename"; + } else { + $header2 = "files,language"; + } + $header2 .= ",blank,comment,code"; + $header2 .= ",scale,3rd gen. equiv" if $opt_3; + $header2 .= ',"' . $header_line . '"'; + push @results, $header2; + } + + my $sum_scaled = 0; + foreach my $lang_or_file (sort { + $rhh_count->{$b}{'code'} <=> + $rhh_count->{$a}{'code'} + } + keys %{$rhh_count}) { + my ($factor, $scaled); + if ($BY_LANGUAGE or $BY_FILE) { + $factor = 1; + if ($BY_LANGUAGE) { + if (defined $rh_scale->{$lang_or_file}) { + $factor = $rh_scale->{$lang_or_file}; + } else { + warn "No scale factor for $lang_or_file; using 1.00"; + } + } else { # by individual code file + $factor = $rh_scale->{$rhh_count->{$lang_or_file}{'lang'}}; + } + $scaled = $factor*$rhh_count->{$lang_or_file}{'code'}; + } else { + if (!defined $rhh_count->{$lang_or_file}{'scaled'}) { + $opt_3 = 0; + # If we're summing together files previously generated + # with --no3 then rhh_count->{$lang_or_file}{'scaled'} + # this variable will be undefined. That should only + # happen when summing together by file however. + } elsif ($BY_LANGUAGE) { + warn "Missing scaled language info for $lang_or_file\n"; + } + if ($opt_3) { + $scaled = $rhh_count->{$lang_or_file}{'scaled'}; + $factor = $scaled/$rhh_count->{$lang_or_file}{'code'}; + } + } + + if ($BY_FILE) { + $data_line = sprintf $Format{'1'}{$Style}, $lang_or_file; + } else { + $data_line = sprintf $Format{'2'}{$Style}, $lang_or_file; + } + $data_line .= sprintf $Format{3}{$Style} , + $rhh_count->{$lang_or_file}{'nFiles'} unless $BY_FILE; + $data_line .= sprintf $Format{4}{$Style} , + $rhh_count->{$lang_or_file}{'blank'} , + $rhh_count->{$lang_or_file}{'comment'}, + $rhh_count->{$lang_or_file}{'code'} ; + $data_line .= sprintf $Format{5}{$Style} , + $factor , + $scaled if $opt_3; + $sum_scaled += $scaled if $opt_3; + + if ($opt_xml) { + if (defined $rhh_count->{$lang_or_file}{'lang'}) { + my $lang = $rhh_count->{$lang_or_file}{'lang'}; + if (!defined $languages->{$lang}) { + $languages->{$lang} = $lang; + } + $data_line.=' language="' . $lang . '" '; + } + if ($BY_FILE) { + push @results, " "; + } else { + push @results, " "; + } + } elsif ($opt_yaml) { + push @results,$lang_or_file . ":"; + push @results," nFiles: " .$rhh_count->{$lang_or_file}{'nFiles'} + unless $BY_FILE; + push @results," blank: " .$rhh_count->{$lang_or_file}{'blank'} ; + push @results," comment: " .$rhh_count->{$lang_or_file}{'comment'}; + push @results," code: " .$rhh_count->{$lang_or_file}{'code'} ; + push @results," language: ".$rhh_count->{$lang_or_file}{'lang'} + if $BY_FILE; + if ($opt_3) { + push @results, " scaled: " . $scaled; + push @results, " factor: " . $factor; + } + } elsif ($opt_csv) { + my $extra_3 = ""; + $extra_3 = ",$factor,$scaled" if $opt_3; + my $str; + if ($BY_FILE) { + $str = $rhh_count->{$lang_or_file}{'lang'} . ","; + } else { + $str = $rhh_count->{$lang_or_file}{'nFiles'} . ","; + } + $str .= $lang_or_file . "," . + $rhh_count->{$lang_or_file}{'blank'} . "," . + $rhh_count->{$lang_or_file}{'comment'}. "," . + $rhh_count->{$lang_or_file}{'code'} . + $extra_3; + push @results, $str; + } else { + push @results, $data_line; + } + } + + my $avg_scale = 1; # weighted average of scale factors + $avg_scale = sprintf("%.2f", $sum_scaled / $sum_code) + if $sum_code and $opt_3; + + if ($opt_xml) { + $data_line = ""; + if (!$BY_FILE) { + $data_line .= sprintf "sum_files=\"%d\" ", $sum_files; + } + $data_line .= sprintf $Format{'4'}{$Style}, + $sum_blank , + $sum_comment , + $sum_code ; + $data_line .= sprintf $Format{'5'}{$Style}, + $avg_scale , + $sum_scaled if $opt_3; + push @results, " "; + + if ($BY_FILE) { + push @results, ""; + } else { + foreach my $language (keys %{$languages}) { + push @results, ' '; + } + push @results, ""; + } + + if (!$opt_by_file_by_lang or $ALREADY_SHOWED_XML_SECTION) { + push @results, "
"; + } else { + $ALREADY_SHOWED_XML_SECTION = 1; + } + } elsif ($opt_yaml) { + push @results, "SUM:"; + push @results, " blank: " . $sum_blank ; + push @results, " code: " . $sum_code ; + push @results, " comment: ". $sum_comment; + push @results, " nFiles: " . $sum_files ; + if ($opt_3) { + push @results, " scaled: " . $sum_scaled; + push @results, " factor: " . $avg_scale ; + } + } elsif ($opt_csv) { + # do nothing + } else { + + if ($BY_FILE) { + $data_line = sprintf "%-${spacing_0}s ", "SUM:" ; + } else { + $data_line = sprintf "%-${spacing_1}s ", "SUM:" ; + $data_line .= sprintf "%${spacing_2}d ", $sum_files; + } + $data_line .= sprintf $Format{'4'}{$Style}, + $sum_blank , + $sum_comment , + $sum_code ; + $data_line .= sprintf $Format{'5'}{$Style}, + $avg_scale , + $sum_scaled if $opt_3; + push @results, $hyphen_line if $sum_files > 1 or $opt_sum_one; + push @results, $data_line if $sum_files > 1 or $opt_sum_one; + push @results, $hyphen_line; + } + write_xsl_file() if $opt_xsl and $opt_xsl eq $CLOC_XSL; + print "<- generate_report\n" if $opt_v > 2; + return @results; +} # 1}}} +sub print_errors { # {{{1 + my ($rh_Error_Codes, # in + $raa_errors , # in + ) = @_; + + print "-> print_errors\n" if $opt_v > 2; + my %error_string = reverse(%{$rh_Error_Codes}); + my $nErrors = scalar @{$raa_errors}; + warn sprintf "\n%d error%s:\n", plural_form(scalar @Errors); + for (my $i = 0; $i < $nErrors; $i++) { + warn sprintf "%s: %s\n", + $error_string{ $raa_errors->[$i][0] }, + $raa_errors->[$i][1] ; + } + print "<- print_errors\n" if $opt_v > 2; + +} # 1}}} +sub write_lang_def { # {{{1 + my ($file , + $rh_Language_by_Extension , # in + $rh_Language_by_Script , # in + $rh_Language_by_File , # in + $rhaa_Filters_by_Language , # in + $rh_Not_Code_Extension , # in + $rh_Not_Code_Filename , # in + $rh_Scale_Factor , # in + $rh_EOL_Continuation_re , # in + ) = @_; + + print "-> write_lang_def($file)\n" if $opt_v > 2; + my $OUT = new IO::File $file, "w"; + die "Unable to write to $file\n" unless defined $OUT; + + foreach my $language (sort keys %{$rhaa_Filters_by_Language}) { + next if $language eq "MATLAB/Objective C/MUMPS" or + $language eq "PHP/Pascal"; + printf $OUT "%s\n", $language; + foreach my $filter (@{$rhaa_Filters_by_Language->{$language}}) { + printf $OUT " filter %s", $filter->[0]; + printf $OUT " %s", $filter->[1] if defined $filter->[1]; + print $OUT "\n"; + } + foreach my $ext (sort keys %{$rh_Language_by_Extension}) { + if ($language eq $rh_Language_by_Extension->{$ext}) { + printf $OUT " extension %s\n", $ext; + } + } + foreach my $filename (sort keys %{$rh_Language_by_File}) { + if ($language eq $rh_Language_by_File->{$filename}) { + printf $OUT " filename %s\n", $filename; + } + } + foreach my $script_exe (sort keys %{$rh_Language_by_Script}) { + if ($language eq $rh_Language_by_Script->{$script_exe}) { + printf $OUT " script_exe %s\n", $script_exe; + } + } + printf $OUT " 3rd_gen_scale %.2f\n", $rh_Scale_Factor->{$language}; + if (defined $rh_EOL_Continuation_re->{$language}) { + printf $OUT " end_of_line_continuation %s\n", + $rh_EOL_Continuation_re->{$language}; + } + } + + $OUT->close; + print "<- write_lang_def\n" if $opt_v > 2; +} # 1}}} +sub read_lang_def { # {{{1 + my ($file , + $rh_Language_by_Extension , # out + $rh_Language_by_Script , # out + $rh_Language_by_File , # out + $rhaa_Filters_by_Language , # out + $rh_Not_Code_Extension , # out + $rh_Not_Code_Filename , # out + $rh_Scale_Factor , # out + $rh_EOL_Continuation_re , # out + $rh_EOL_abc, + ) = @_; + + + print "-> read_lang_def($file)\n" if $opt_v > 2; + my $IN = new IO::File $file, "r"; + die "Unable to read $file.\n" unless defined $IN; + + my $language = ""; + while (<$IN>) { + next if /^\s*#/ or /^\s*$/; + + if (/^(\w+.*?)\s*$/) { + $language = $1; + next; + } + die "Missing computer language name, line $. of $file\n" + unless $language; + + if (/^ filter\s+(\w+)\s*$/) { + push @{$rhaa_Filters_by_Language->{$language}}, [ $1 ] + + } elsif (/^ filter\s+(\w+)\s+(.*?)\s*$/) { + push @{$rhaa_Filters_by_Language->{$language}}, [ $1 , $2 ] + + } elsif (/^ extension\s+(\S+)\s*$/) { + if (defined $rh_Language_by_Extension->{$1}) { + die "File extension collision: $1 ", + "maps to languages '$rh_Language_by_Extension->{$1}' ", + "and '$language'\n" , + "Edit $file and remove $1 from one of these two ", + "language definitions.\n"; + } + $rh_Language_by_Extension->{$1} = $language; + + } elsif (/^ filename\s+(\S+)\s*$/) { + $rh_Language_by_File->{$1} = $language; + + } elsif (/^ script_exe\s+(\S+)\s*$/) { + $rh_Language_by_Script->{$1} = $language; + + } elsif (/^ 3rd_gen_scale\s+(\S+)\s*$/) { + $rh_Scale_Factor->{$language} = $1; + + } elsif (/^ end_of_line_continuation\s+(\S+)\s*$/) { + $rh_EOL_Continuation_re->{$language} = $1; + + } else { + die "Unexpected data line $. of $file:\n$_\n"; + } + + } + $IN->close; + print "<- read_lang_def\n" if $opt_v > 2; +} # 1}}} +sub print_extension_info { # {{{1 + my ($extension,) = @_; + if ($extension) { # show information on this extension + foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { + # Language_by_Extension{f} = 'Fortran 77' + printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext} + if $ext =~ m{$extension}i; + } + } else { # show information on all extensions + foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { + # Language_by_Extension{f} = 'Fortran 77' + printf "%-12s -> %s\n", $ext, $Language_by_Extension{$ext}; + } + } +} # 1}}} +sub print_language_info { # {{{1 + my ($language,) = @_; + my %extensions = (); # the subset matched by the given $language value + if ($language) { # show information on this language + foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { + # Language_by_Extension{f} = 'Fortran 77' + push @{$extensions{$Language_by_Extension{$ext}} }, $ext + if $Language_by_Extension{$ext} =~ m{$language}i; + } + } else { # show information on all languages + foreach my $ext (sort {lc $a cmp lc $b } keys %Language_by_Extension) { + # Language_by_Extension{f} = 'Fortran 77' + push @{$extensions{$Language_by_Extension{$ext}} }, $ext + } + } + + # add exceptions (one file extension mapping to multiple languages) + if (!$language or + $language =~ /^(Objective C|MATLAB|MUMPS)$/i) { + push @{$extensions{'Objective C'}}, "m"; + push @{$extensions{'MATLAB'}} , "m"; + push @{$extensions{'MUMPS'}} , "m"; + delete $extensions{'MATLAB/Objective C/MUMPS'}; + } + + if (%extensions) { + foreach my $lang (sort {lc $a cmp lc $b } keys %extensions) { + printf "%-26s (%s)\n", $lang, join(", ", @{$extensions{$lang}}); + } + } +} # 1}}} +sub make_file_list { # {{{1 + my ($ra_arg_list, # in file and/or directory names to examine + $rh_Err , # in hash of error codes + $raa_errors , # out errors encountered + $rh_ignored , # out files not recognized as computer languages + ) = @_; + print "-> make_file_list(@{$ra_arg_list})\n" if $opt_v > 2; + + my ($fh, $filename); + if ($opt_categorized) { + $filename = $opt_categorized; + $fh = new IO::File $filename, "+>"; # open for read/write + die "Unable to write to $filename: $!\n" unless defined $fh; + } elsif ($opt_sdir) { + # write to the user-defined scratch directory + $filename = $opt_sdir . '/cloc_file_list.txt'; + $fh = new IO::File $filename, "+>"; # open for read/write + die "Unable to write to $filename: $!\n" unless defined $fh; + } else { + # let File::Temp create a suitable temporary file + ($fh, $filename) = tempfile(UNLINK => 1); # delete file on exit + print "Using temp file list [$filename]\n" if $opt_v; + } + + my @dir_list = (); + foreach my $file_or_dir (@{$ra_arg_list}) { +#print "make_file_list file_or_dir=$file_or_dir\n"; + my $size_in_bytes = 0; + if (!-r $file_or_dir) { + push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file_or_dir]; + next; + } + if (is_file($file_or_dir)) { + if (!(-s $file_or_dir)) { # 0 sized file, named pipe, socket + $rh_ignored->{$file_or_dir} = 'zero sized file'; + next; + } elsif (-B $file_or_dir and !$opt_read_binary_files) { + # avoid binary files unless user insists on reading them + if ($opt_unicode) { + # only ignore if not a Unicode file w/trivial + # ASCII transliteration + if (!unicode_file($file_or_dir)) { + $rh_ignored->{$file_or_dir} = 'binary file'; + next; + } + } else { + $rh_ignored->{$file_or_dir} = 'binary file'; + next; + } + } + push @file_list, "$file_or_dir"; + } elsif (is_dir($file_or_dir)) { + push @dir_list, $file_or_dir; + } else { + push @{$raa_errors}, [$rh_Err->{'Neither file nor directory'} , $file_or_dir]; + $rh_ignored->{$file_or_dir} = 'not file, not directory'; + } + } + foreach my $dir (@dir_list) { +#print "make_file_list dir=$dir\n"; + # populates global variable @file_list + find({wanted => \&files, follow => $opt_follow_links }, $dir); + } + $nFiles_Found = scalar @file_list; + printf "%8d text file%s.\n", plural_form($nFiles_Found) unless $opt_quiet; + write_file($opt_found, sort @file_list) if $opt_found; + + my $nFiles_Categorized = 0; + foreach my $file (@file_list) { + printf "classifying $file\n" if $opt_v > 2; + + my $basename = basename $file; + if ($Not_Code_Filename{$basename}) { + $rh_ignored->{$file} = "listed in " . '$' . + "Not_Code_Filename{$basename}"; + next; + } elsif ($basename =~ m{~$}) { + $rh_ignored->{$file} = "temporary editor file"; + next; + } + + my $size_in_bytes = (stat $file)[7]; + my $language = ""; + if ($All_One_Language) { + # user over-rode auto-language detection by using + # --force-lang with just a language name (no extension) + $language = $All_One_Language; + } else { + $language = classify_file($file , + $rh_Err , + $raa_errors, + $rh_ignored); + } +die "make_file_list($file) undef size" unless defined $size_in_bytes; +die "make_file_list($file) undef lang" unless defined $language; + printf $fh "%d,%s,%s\n", $size_in_bytes, $language, $file; + ++$nFiles_Categorized; + #printf "classified %d files\n", $nFiles_Categorized + # unless (!$opt_progress_rate or + # ($nFiles_Categorized % $opt_progress_rate)); + } + printf "classified %d files\r", $nFiles_Categorized + if !$opt_quiet and $nFiles_Categorized > 1; + + print "<- make_file_list()\n" if $opt_v > 2; + + return $fh; # handle to the file containing the list of files to process +} # 1}}} +sub remove_duplicate_files { # {{{1 + my ($fh , # in + $rh_Language , # out + $rh_unique_source_file, # out + $rh_Err , # in + $raa_errors , # out errors encountered + $rh_ignored , # out + ) = @_; + + # Check for duplicate files by comparing file sizes. + # Where files are equally sized, compare their MD5 checksums. + print "-> remove_duplicate_files\n" if $opt_v > 2; + + my $n = 0; + my %files_by_size = (); # files_by_size{ # bytes } = [ list of files ] + seek($fh, 0, 0); # rewind to beginning of the temp file + while (<$fh>) { + ++$n; + my ($size_in_bytes, $language, $file) = split(/,/, $_, 3); + chomp($file); + $rh_Language->{$file} = $language; + push @{$files_by_size{$size_in_bytes}}, $file; + if ($opt_skip_uniqueness) { + $rh_unique_source_file->{$file} = 1; + } + } + return if $opt_skip_uniqueness; + if ($opt_progress_rate and ($n > $opt_progress_rate)) { + printf "Duplicate file check %d files (%d known unique)\r", + $n, scalar keys %files_by_size; + } + $n = 0; + foreach my $bytes (sort {$a <=> $b} keys %files_by_size) { + ++$n; + printf "Unique: %8d files \r", + $n unless (!$opt_progress_rate or ($n % $opt_progress_rate)); + if (scalar @{$files_by_size{$bytes}} == 1) { + # only one file is this big; must be unique + $rh_unique_source_file->{$files_by_size{$bytes}[0]} = 1; + next; + } else { +#print "equally sized files: ",join(", ", @{$files_by_size{$bytes}}), "\n"; + foreach my $F (different_files(\@{$files_by_size{$bytes}}, + $rh_Err , + $raa_errors , + $rh_ignored ) ) { + $rh_unique_source_file->{$F} = 1; + } + } + } + print "<- remove_duplicate_files\n" if $opt_v > 2; +} # 1}}} +sub files { # {{{1 + # invoked by File::Find's find() Populates global variable @file_list + if ($opt_exclude_dir or $opt_exclude_list_file) { + my $return = 0; + foreach my $skip_dir (keys %Exclude_Dir) { + # File::Find::dir used to always start with / but + # newer versions (1.13) no longer do; have to correct for this + my $dir = $File::Find::dir; + $dir = "./$dir" unless $dir =~ m{^/}; + if ($dir =~ m{/\Q$skip_dir\E(/|$)} ) { + $Ignored{$File::Find::name} = "--exclude-dir=$skip_dir"; + $return = 1; + last; + } + } + return if $return; + } + my $Dir = cwd(); # not $File::Find::dir which just gives relative path + if ($opt_match_f ) { return unless /$opt_match_f/; } + if ($opt_not_match_f) { return if /$opt_not_match_f/; } + if ($opt_match_d ) { return unless $Dir =~ m{$opt_match_d} } + if ($opt_not_match_d) { return if $Dir =~ m{$opt_not_match_d} } + + my $nBytes = -s $_ ; + if (!$nBytes and $opt_v > 5) { + printf "files(%s) zero size\n", $File::Find::name; + } + return unless $nBytes ; # attempting other tests w/pipe or socket will hang + my $is_dir = is_dir($_); + my $is_bin = -B $_ ; + printf "files(%s) size=%d is_dir=%d -B=%d\n", + $File::Find::name, $nBytes, $is_dir, $is_bin if $opt_v > 5; + $is_bin = 0 if $opt_unicode and unicode_file($_); + $is_bin = 0 if $opt_read_binary_files; + return if $is_dir or $is_bin; + ++$nFiles_Found; + printf "%8d files\r", $nFiles_Found + unless (!$opt_progress_rate or ($nFiles_Found % $opt_progress_rate)); + push @file_list, $File::Find::name; +} # 1}}} +sub archive_files { # {{{1 + # invoked by File::Find's find() Populates global variable @binary_archive + foreach my $ext (keys %Known_Binary_Archives) { + push @binary_archive, $File::Find::name + if $File::Find::name =~ m{$ext$}; + } +} # 1}}} +sub is_file { # {{{1 + # portable method to test if item is a file + # (-f doesn't work in ActiveState Perl on Windows) + my $item = shift @_; + + if ($ON_WINDOWS) { + my $mode = (stat $item)[2]; + $mode = 0 unless $mode; + if ($mode & 0100000) { return 1; } + else { return 0; } + } else { + return (-f $item); # works on Unix, Linux, CygWin, z/OS + } +} # 1}}} +sub is_dir { # {{{1 + # portable method to test if item is a directory + # (-d doesn't work in ActiveState Perl on Windows) + my $item = shift @_; + + if ($ON_WINDOWS) { + my $mode = (stat $item)[2]; + $mode = 0 unless $mode; + if ($mode & 0040000) { return 1; } + else { return 0; } + } else { + return (-d $item); # works on Unix, Linux, CygWin, z/OS + } +} # 1}}} +sub is_excluded { # {{{1 + my ($file , # in + $excluded , # in hash of excluded directories + ) = @_; + my($filename, $filepath, $suffix) = fileparse($file); + foreach my $path (sort keys %{$excluded}) { + return 1 if ($filepath =~ m{^$path/}i); + } +} # 1}}} +sub classify_file { # {{{1 + my ($full_file , # in + $rh_Err , # in hash of error codes + $raa_errors , # out + $rh_ignored , # out + ) = @_; + + print "-> classify_file($full_file)\n" if $opt_v > 2; + my $language = "(unknown)"; + + my $look_at_first_line = 0; + my $file = basename $full_file; + if ($opt_autoconf and $file =~ /\.in$/) { + $file =~ s/\.in$//; + } + return $language if $Not_Code_Filename{$file}; # (unknown) + return $language if $file =~ m{~$}; # a temp edit file (unknown) + if (defined $Language_by_File{$file}) { + return $Language_by_File{$file}; + } + + if ($file =~ /\.(\w+)$/) { # has an extension + print "$full_file extension=[$1]\n" if $opt_v > 2; + my $extension = $1; + # Windows file names are case insensitive so map + # all extensions to lowercase there. + $extension = lc $extension if $ON_WINDOWS; + my @extension_list = ( $extension ); + if ($file =~ /\.(\w+\.\w+)$/) { # has a double extension + my $extension = $1; + $extension = lc $extension if $ON_WINDOWS; + unshift @extension_list, $extension; # examine double ext first + } + foreach my $extension (@extension_list) { + if ($Not_Code_Extension{$extension} and + !$Forced_Extension{$extension}) { + # If .1 (for example) is an extention that would ordinarily be + # ignored but the user has insisted this be counted with the + # --force-lang option, then go ahead and count it. + $rh_ignored->{$full_file} = + 'listed in $Not_Code_Extension{' . $extension . '}'; + return $language; + } + if (defined $Language_by_Extension{$extension}) { + if ($Language_by_Extension{$extension} eq + 'MATLAB/Objective C/MUMPS') { + my $lang_M_or_O = ""; + matlab_or_objective_C($full_file , + $rh_Err , + $raa_errors, + \$lang_M_or_O); + if ($lang_M_or_O) { + return $lang_M_or_O; + } else { # an error happened in matlab_or_objective_C() + $rh_ignored->{$full_file} = + 'failure in matlab_or_objective_C()'; + return $language; # (unknown) + } + } elsif ($Language_by_Extension{$extension} eq 'PHP/Pascal') { + if (really_is_php($full_file)) { + return 'PHP'; + } elsif (really_is_incpascal($full_file)) { + return 'Pascal'; + } else { + return $language; # (unknown) + } + } elsif ($Language_by_Extension{$extension} eq 'Smarty') { + # Smarty extension .tpl is generic; make sure the + # file at least roughly resembles PHP + if (really_is_php($full_file)) { + return 'Smarty'; + } else { + return $language; # (unknown) + } + } else { + return $Language_by_Extension{$extension}; + } + } else { # has an unmapped file extension + $look_at_first_line = 1; + } + } + } elsif (defined $Language_by_File{lc $file}) { + return $Language_by_File{lc $file}; + } elsif ($opt_lang_no_ext and + defined $Filters_by_Language{$opt_lang_no_ext}) { + return $opt_lang_no_ext; + } else { # no file extension + $look_at_first_line = 1; + } + + if ($look_at_first_line) { + # maybe it is a shell/Perl/Python/Ruby/etc script that + # starts with pound bang: + # #!/usr/bin/perl + # #!/usr/bin/env perl + my $script_language = peek_at_first_line($full_file , + $rh_Err , + $raa_errors); + if (!$script_language) { + $rh_ignored->{$full_file} = "language unknown (#2)"; + # returns (unknown) + } + if (defined $Language_by_Script{$script_language}) { + if (defined $Filters_by_Language{ + $Language_by_Script{$script_language}}) { + $language = $Language_by_Script{$script_language}; + } else { + $rh_ignored->{$full_file} = + "undefined: Filters_by_Language{" . + $Language_by_Script{$script_language} . + "} for scripting language $script_language"; + # returns (unknown) + } + } else { + $rh_ignored->{$full_file} = "language unknown (#3)"; + # returns (unknown) + } + } + print "<- classify_file($full_file)\n" if $opt_v > 2; + return $language; +} # 1}}} +sub peek_at_first_line { # {{{1 + my ($file , # in + $rh_Err , # in hash of error codes + $raa_errors , # out + ) = @_; + + print "-> peek_at_first_line($file)\n" if $opt_v > 2; + + my $script_language = ""; + if (!-r $file) { + push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file]; + return $script_language; + } + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file]; + print "<- peek_at_first_line($file)\n" if $opt_v > 2; + return $script_language; + } + chomp(my $first_line = <$IN>); + if (defined $first_line) { +#print "peek_at_first_line of [$file] first_line=[$first_line]\n"; + if ($first_line =~ /^#\!\s*(\S.*?)$/) { +#print "peek_at_first_line 1=[$1]\n"; + my @pound_bang = split(' ', $1); +#print "peek_at_first_line basename 0=[", basename($pound_bang[0]), "]\n"; + if (basename($pound_bang[0]) eq "env" and + scalar @pound_bang > 1) { + $script_language = $pound_bang[1]; +#print "peek_at_first_line pound_bang A $pound_bang[1]\n"; + } else { + $script_language = basename $pound_bang[0]; +#print "peek_at_first_line pound_bang B $script_language\n"; + } + } + } + $IN->close; + print "<- peek_at_first_line($file)\n" if $opt_v > 2; + return $script_language; +} # 1}}} +sub different_files { # {{{1 + # See which of the given files are unique by computing each file's MD5 + # sum. Return the subset of files which are unique. + my ($ra_files , # in + $rh_Err , # in + $raa_errors , # out + $rh_ignored , # out + ) = @_; + + print "-> different_files(@{$ra_files})\n" if $opt_v > 2; + my %file_hash = (); # file_hash{md5 hash} = [ file1, file2, ... ] + foreach my $F (@{$ra_files}) { + next if is_dir($F); # needed for Windows + my $IN = new IO::File $F, "r"; + if (!defined $IN) { + push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $F]; + $rh_ignored->{$F} = 'cannot read'; + } else { + if ($HAVE_Digest_MD5) { + binmode $IN; + my $MD5 = Digest::MD5->new->addfile($IN)->hexdigest; + push @{$file_hash{$MD5}}, $F; + } else { + # all files treated unique + push @{$file_hash{$F}}, $F; + } + $IN->close; + } + } + + # Loop over file sets having identical MD5 sums. Within + # each set, pick the file that most resembles known source + # code. + my @unique = (); + for my $md5 (sort keys %file_hash) { + my $i_best = 0; + for (my $i = 1; $i < scalar(@{$file_hash{$md5}}); $i++) { + my $F = $file_hash{$md5}[$i]; + my (@nul_a, %nul_h); + my $language = classify_file($F, $rh_Err, + # don't save these errors; pointless + \@nul_a, \%nul_h); + $i_best = $i if $language ne "(unknown)"; + } + push @unique, $file_hash{$md5}[$i_best]; + } + print "<- different_files(@unique)\n" if $opt_v > 2; + return @unique; +} # 1}}} +sub call_counter { # {{{1 + my ($file , # in + $language , # in + $ra_Errors, # out + ) = @_; + + # Logic: pass the file through the following filters: + # 1. remove blank lines + # 2. remove comments using each filter defined for this language + # (example: SQL has two, remove_starts_with(--) and + # remove_c_comments() ) + # 3. compute comment lines as + # total lines - blank lines - lines left over after all + # comment filters have been applied + + print "-> call_counter($file, $language)\n" if $opt_v > 2; +#print "call_counter: ", Dumper(@routines), "\n"; + + my @lines = (); + my $ascii = ""; + if (-B $file and $opt_unicode) { + # was binary so must be unicode + + $/ = undef; + my $IN = new IO::File $file, "r"; + my $bin_text = <$IN>; + $IN->close; + $/ = "\n"; + + $ascii = unicode_to_ascii( $bin_text ); + @lines = split("\n", $ascii ); + foreach (@lines) { $_ = "$_\n"; } + + } else { + # regular text file + @lines = read_file($file); + $ascii = join('', @lines); + } + + my @original_lines = @lines; + my $total_lines = scalar @lines; + + print_lines($file, "Original file:", \@lines) if $opt_print_filter_stages; + @lines = rm_blanks(\@lines, $language, \%EOL_Continuation_re); # remove blank lines + my $blank_lines = $total_lines - scalar @lines; + print_lines($file, "Blank lines removed:", \@lines) + if $opt_print_filter_stages; + + @lines = rm_comments(\@lines, $language, $file, + \%EOL_Continuation_re); + + my $comment_lines = $total_lines - $blank_lines - scalar @lines; + if ($opt_strip_comments) { + my $stripped_file = ""; + if ($opt_original_dir) { + $stripped_file = $file . ".$opt_strip_comments"; + } else { + $stripped_file = basename $file . ".$opt_strip_comments"; + } + write_file($stripped_file, @lines); + } + if ($opt_html and !$opt_diff) { + chomp(@original_lines); # includes blank lines, comments + chomp(@lines); # no blank lines, no comments + + my (@diff_L, @diff_R, %count); + + # remove blank lines to get better quality diffs; count + # blank lines separately + my @original_lines_minus_white = (); + # however must keep track of how many blank lines were removed and + # where they were removed so that the HTML display can include it + my %blank_line = (); + my $insert_line = 0; + foreach (@original_lines) { + if (/^\s*$/) { + ++$count{blank}{same}; + ++$blank_line{ $insert_line }; + } else { + ++$insert_line; + push @original_lines_minus_white, $_; + } + } + + array_diff( $file , # in + \@original_lines_minus_white , # in + \@lines , # in + "comment" , # in + \@diff_L, \@diff_R, , # out + $ra_Errors); # in/out + write_comments_to_html($file, \@diff_L, \@diff_R, \%blank_line); +#print Dumper("count", \%count); + } + + print "<- call_counter($total_lines, $blank_lines, $comment_lines)\n" + if $opt_v > 2; + return ($total_lines, $blank_lines, $comment_lines); +} # 1}}} +sub windows_glob { # {{{1 + # Windows doesn't expand wildcards. Use code from Sean M. Burke's + # Win32::Autoglob module to do this. + return map {; + ( defined($_) and m/[\*\?]/ ) ? sort(glob($_)) : $_ + } @_; +} # 1}}} +sub write_file { # {{{1 + my ($file , # in + @lines , # in + ) = @_; + +#print "write_file 1 [$file]\n"; + # Do ~ expansion (by Tim LaBerge, fixes bug 2787984) + my $preglob_filename = $file; +#print "write_file 2 [$preglob_filename]\n"; + if ($ON_WINDOWS) { + $file = (windows_glob($file))[0]; + } else { + $file = glob($file); # sometimes returns null string + } +#print "write_file 3 [$file]\n"; + $file = $preglob_filename unless $file; +#print "write_file 4 [$file]\n"; + + print "-> write_file($file)\n" if $opt_v > 2; + + # Create the destination directory if it doesn't already exist. + my $abs_file_path = File::Spec->rel2abs( $file ); + my ($volume, $directories, $filename) = File::Spec->splitpath( $abs_file_path ); + mkpath($volume . $directories, 1, 0777); + + my $OUT = new IO::File $file, "w"; + if (defined $OUT) { + chomp(@lines); + print $OUT join("\n", @lines), "\n"; + $OUT->close; + } else { + warn "Unable to write to $file\n"; + } + print "Wrote $file"; + print ", $CLOC_XSL" if $opt_xsl and $opt_xsl eq $CLOC_XSL; + print "\n"; + + print "<- write_file\n" if $opt_v > 2; +} # 1}}} +sub read_file { # {{{1 + my ($file, ) = @_; + + print "-> read_file($file)\n" if $opt_v > 2; + my @lines = (); + my $IN = new IO::File $file, "r"; + if (defined $IN) { + @lines = <$IN>; + $IN->close; + # Some files don't end with a new line. Force this: + $lines[$#lines] .= "\n" unless $lines[$#lines] =~ m/\n$/; + } else { + warn "Unable to read $file\n"; + } + print "<- read_file\n" if $opt_v > 2; + return @lines; +} # 1}}} +sub rm_blanks { # {{{1 + my ($ra_in , + $language , + $rh_EOL_continuation_re) = @_; + print "-> rm_blanks(language=$language)\n" if $opt_v > 2; +#print "rm_blanks: language = [$language]\n"; + my @out = (); + if ($language eq "COBOL") { + @out = remove_cobol_blanks($ra_in); + } else { + # removes blank lines + if (defined $rh_EOL_continuation_re->{$language}) { + @out = remove_matches_2re($ra_in, '^\s*$', + $rh_EOL_continuation_re->{$language}); + } else { + @out = remove_matches($ra_in, '^\s*$'); + } + } + print "<- rm_blanks(language=$language)\n" if $opt_v > 2; + return @out; +} # 1}}} +sub rm_comments { # {{{1 + my ($ra_lines , # in, must be free of blank lines + $language , # in + $file , # in (some language counters, eg Haskell, need + # access to the original file) + $rh_EOL_continuation_re , # in + ) = @_; + print "-> rm_comments(file=$file)\n" if $opt_v > 2; + my @routines = @{$Filters_by_Language{$language}}; + my @lines = @{$ra_lines}; + my @original_lines = @{$ra_lines}; + + foreach my $call_string (@routines) { + my $subroutine = $call_string->[0]; + if (! defined &{$subroutine}) { + warn "rm_comments undefined subroutine $subroutine for $file\n"; + next; + } + print "rm_comments file=$file sub=$subroutine\n" if $opt_v > 1; + my @args = @{$call_string}; + shift @args; # drop the subroutine name + if (@args and $args[0] eq '>filename<') { + shift @args; + unshift @args, $file; + } + + no strict 'refs'; + @lines = &{$subroutine}(\@lines, @args); # apply filter... + + print_lines($file, "After $subroutine(@args)", \@lines) + if $opt_print_filter_stages; + # then remove blank lines which are created by comment removal + if (defined $rh_EOL_continuation_re->{$language}) { + @lines = remove_matches_2re(\@lines, '^\s*$', + $rh_EOL_continuation_re); + } else { + @lines = remove_matches(\@lines, '^\s*$'); + } + + print_lines($file, "post $subroutine(@args) blank cleanup:", \@lines) + if $opt_print_filter_stages; + } + # Exception for scripting languages: treat the first #! line as code. + # Will need to add it back in if it was removed earlier. + if ($Script_Language{$language} and + $original_lines[0] =~ /^#!/ and + (scalar(@lines) == 0 or + $lines[0] ne $original_lines[0])) { + unshift @lines, $original_lines[0]; # add the first line back + } + print "<- rm_comments\n" if $opt_v > 2; + return @lines; +} # 1}}} +sub remove_f77_comments { # {{{1 + my ($ra_lines, ) = @_; + print "-> remove_f77_comments\n" if $opt_v > 2; + + my @save_lines = (); + foreach (@{$ra_lines}) { + next if m{^[*cC]}; + next if m{^\s*!}; + push @save_lines, $_; + } + + print "<- remove_f77_comments\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub remove_f90_comments { # {{{1 + # derived from SLOCCount + my ($ra_lines, ) = @_; + print "-> remove_f90_comments\n" if $opt_v > 2; + + my @save_lines = (); + foreach (@{$ra_lines}) { + # a comment is m/^\s*!/ + # an empty line is m/^\s*$/ + # a HPF statement is m/^\s*!hpf\$/i + # an Open MP statement is m/^\s*!omp\$/i + if (! m/^(\s*!|\s*$)/ || m/^\s*!(hpf|omp)\$/i) { + push @save_lines, $_; + } + } + + print "<- remove_f90_comments\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub remove_matches { # {{{1 + my ($ra_lines, # in + $pattern , # in Perl regular expression (case insensitive) + ) = @_; + print "-> remove_matches(pattern=$pattern)\n" if $opt_v > 2; + + my @save_lines = (); + foreach (@{$ra_lines}) { +#chomp; print "remove_matches [$pattern] [$_]\n"; + next if m{$pattern}i; + push @save_lines, $_; + } + + print "<- remove_matches\n" if $opt_v > 2; +#print "remove_matches returning\n ", join("\n ", @save_lines), "\n"; + return @save_lines; +} # 1}}} +sub remove_matches_2re { # {{{1 + my ($ra_lines, # in + $pattern1, # in Perl regex 1 (case insensitive) to match + $pattern2, # in Perl regex 2 (case insensitive) to not match prev line + ) = @_; + print "-> remove_matches_2re(pattern=$pattern1,$pattern2)\n" if $opt_v > 2; + + my @save_lines = (); + for (my $i = 0; $i < scalar @{$ra_lines}; $i++) { +#print "remove_matches_2re [$pattern1] [$pattern2] [$ra_lines->[$i]]\n"; + if ($i) { +#print "remove_matches_2re prev=[$ra_lines->[$i-1]] this=[$ra_lines->[$i]]\n"; + next if ($ra_lines->[$i] =~ m{$pattern1}i) and + ($ra_lines->[$i-1] !~ m{$pattern2}i); + } else { + # on first line + next if $ra_lines->[$i] =~ m{$pattern1}i; + } + push @save_lines, $ra_lines->[$i]; + } + + print "<- remove_matches_2re\n" if $opt_v > 2; +#print "remove_matches_2re returning\n ", join("\n ", @save_lines), "\n"; + return @save_lines; +} # 1}}} +sub remove_inline { # {{{1 + my ($ra_lines, # in + $pattern , # in Perl regular expression (case insensitive) + ) = @_; + print "-> remove_inline(pattern=$pattern)\n" if $opt_v > 2; + + my @save_lines = (); + unless ($opt_inline) { + return @{$ra_lines}; + } + my $nLines_affected = 0; + foreach (@{$ra_lines}) { +#chomp; print "remove_inline [$pattern] [$_]\n"; + if (m{$pattern}i) { + ++$nLines_affected; + s{$pattern}{}i; + } + push @save_lines, $_; + } + + print "<- remove_inline\n" if $opt_v > 2; +#print "remove_inline returning\n ", join("\n ", @save_lines), "\n"; + return @save_lines; +} # 1}}} +sub remove_above { # {{{1 + my ($ra_lines, $marker, ) = @_; + print "-> remove_above(marker=$marker)\n" if $opt_v > 2; + + # Make two passes through the code: + # 1. check if the marker exists + # 2. remove anything above the marker if it exists, + # do nothing if the marker does not exist + + # Pass 1 + my $found_marker = 0; + for (my $line_number = 1; + $line_number <= scalar @{$ra_lines}; + $line_number++) { + if ($ra_lines->[$line_number-1] =~ m{$marker}) { + $found_marker = $line_number; + last; + } + } + + # Pass 2 only if needed + my @save_lines = (); + if ($found_marker) { + my $n = 1; + foreach (@{$ra_lines}) { + push @save_lines, $_ + if $n >= $found_marker; + ++$n; + } + } else { # marker wasn't found; save all lines + foreach (@{$ra_lines}) { + push @save_lines, $_; + } + } + + print "<- remove_above\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub remove_below { # {{{1 + my ($ra_lines, $marker, ) = @_; + print "-> remove_below(marker=$marker)\n" if $opt_v > 2; + + my @save_lines = (); + foreach (@{$ra_lines}) { + last if m{$marker}; + push @save_lines, $_; + } + + print "<- remove_below\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub remove_below_above { # {{{1 + my ($ra_lines, $marker_below, $marker_above, ) = @_; + # delete lines delimited by start and end line markers such + # as Perl POD documentation + print "-> remove_below_above(markerB=$marker_below, A=$marker_above)\n" + if $opt_v > 2; + + my @save_lines = (); + my $between = 0; + foreach (@{$ra_lines}) { + if (!$between and m{$marker_below}) { + $between = 1; + next; + } + if ($between and m{$marker_above}) { + $between = 0; + next; + } + next if $between; + push @save_lines, $_; + } + + print "<- remove_below_above\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub remove_between { # {{{1 + my ($ra_lines, $marker, ) = @_; + # $marker must contain one of the balanced pairs understood + # by Regexp::Common::balanced, namely + # '{}' '()' '[]' or '<>' + + print "-> remove_between(marker=$marker)\n" if $opt_v > 2; + my %acceptable = ('{}'=>1, '()'=>1, '[]'=>1, '<>'=>1, ); + die "remove_between: invalid delimiter '$marker'\n", + "the delimiter must be one of these four pairs:\n", + "{} () [] <>\n" unless + $acceptable{$marker}; + + Install_Regexp_Common() unless $HAVE_Rexexp_Common; + + my $all_lines = join("", @{$ra_lines}); + + no strict 'vars'; + # otherwise get: + # Global symbol "%RE" requires explicit package name at cloc line xx. + if ($all_lines =~ m/$RE{balanced}{-parens => $marker}/) { + no warnings; + $all_lines =~ s/$1//g; + } + + print "<- remove_between\n" if $opt_v > 2; + return split("\n", $all_lines); +} # 1}}} +sub remove_cobol_blanks { # {{{1 + # subroutines derived from SLOCCount + my ($ra_lines, ) = @_; + + my $free_format = 0; # Support "free format" source code. + my @save_lines = (); + + foreach (@{$ra_lines}) { + next if m/^\s*$/; + my $line = expand($_); # convert tabs to equivalent spaces + $free_format = 1 if $line =~ m/^......\$.*SET.*SOURCEFORMAT.*FREE/i; + if ($free_format) { + push @save_lines, $_; + } else { + # Greg Toth: + # (1) Treat lines with any alphanum in cols 1-6 and + # blanks in cols 7 through 71 as blank line, and + # (2) Treat lines with any alphanum in cols 1-6 and + # slash (/) in col 7 as blank line (this is a + # page eject directive). + push @save_lines, $_ unless m/^\d{6}\s*$/ or + ($line =~ m/^.{6}\s{66}/) or + ($line =~ m/^......\//); + } + } + return @save_lines; +} # 1}}} +sub remove_cobol_comments { # {{{1 + # subroutines derived from SLOCCount + my ($ra_lines, ) = @_; + + my $free_format = 0; # Support "free format" source code. + my @save_lines = (); + + foreach (@{$ra_lines}) { + if (m/^......\$.*SET.*SOURCEFORMAT.*FREE/i) {$free_format = 1;} + if ($free_format) { + push @save_lines, $_ unless m{^\s*\*}; + } else { + push @save_lines, $_ unless m{^......\*} or m{^\*}; + } + } + return @save_lines; +} # 1}}} +sub remove_jcl_comments { # {{{1 + my ($ra_lines, ) = @_; + + print "-> remove_jcl_comments\n" if $opt_v > 2; + + my @save_lines = (); + my $in_comment = 0; + foreach (@{$ra_lines}) { + next if /^\s*$/; + next if m{^\s*//\*}; + last if m{^\s*//\s*$}; + push @save_lines, $_; + } + + print "<- remove_jcl_comments\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub remove_jsp_comments { # {{{1 + # JSP comment is <%-- body of comment --%> + my ($ra_lines, ) = @_; + + print "-> remove_jsp_comments\n" if $opt_v > 2; + + my @save_lines = (); + my $in_comment = 0; + foreach (@{$ra_lines}) { + + next if /^\s*$/; + s/<\%\-\-.*?\-\-\%>//g; # strip one-line comments + next if /^\s*$/; + if ($in_comment) { + if (/\-\-\%>/) { + s/^.*?\-\-\%>//; + $in_comment = 0; + } + } + next if /^\s*$/; + $in_comment = 1 if /^(.*?)<\%\-\-/; + next if defined $1 and $1 =~ /^\s*$/; + next if ($in_comment); + push @save_lines, $_; + } + + print "<- remove_jsp_comments\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub remove_html_comments { # {{{1 + # HTML comment is + # Need to use my own routine until the HTML comment regex in + # the Regexp::Common module can handle + my ($ra_lines, ) = @_; + + print "-> remove_html_comments\n" if $opt_v > 2; + + my @save_lines = (); + my $in_comment = 0; + foreach (@{$ra_lines}) { + + next if /^\s*$/; + s///g; # strip one-line comments + next if /^\s*$/; + if ($in_comment) { + if (/\-\->/) { + s/^.*?\-\->//; + $in_comment = 0; + } + } + next if /^\s*$/; + $in_comment = 1 if /^(.*?) 2; + return @save_lines; +} # 1}}} +sub add_newlines { # {{{1 + my ($ra_lines, ) = @_; + print "-> add_newlines \n" if $opt_v > 2; + + my @save_lines = (); + foreach (@{$ra_lines}) { + + push @save_lines, "$_\n"; + } + + print "<- add_newlines \n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub docstring_to_C { # {{{1 + my ($ra_lines, ) = @_; + # Converts Python docstrings to C comments. + + print "-> docstring_to_C()\n" if $opt_v > 2; + + my $in_docstring = 0; + foreach (@{$ra_lines}) { + while (/"""/) { + if (!$in_docstring) { + s{"""}{/*}; + $in_docstring = 1; + } else { + s{"""}{*/}; + $in_docstring = 0; + } + } + } + + print "<- docstring_to_C\n" if $opt_v > 2; + return @{$ra_lines}; +} # 1}}} +sub smarty_to_C { # {{{1 + my ($ra_lines, ) = @_; + # Converts Smarty comments to C comments. + + print "-> smarty_to_C()\n" if $opt_v > 2; + + foreach (@{$ra_lines}) { + s[{\*][/*]g; + s[\*}][*/]g; + } + + print "<- smarty_to_C\n" if $opt_v > 2; + return @{$ra_lines}; +} # 1}}} +sub determine_lit_type { # {{{1 + my ($file) = @_; + + open (FILE, $file); + while () { + if (m/^\\begin{code}/) { close FILE; return 2; } + if (m/^>\s/) { close FILE; return 1; } + } + + return 0; +} # 1}}} +sub remove_haskell_comments { # {{{1 + # Bulk of code taken from SLOCCount's haskell_count script. + # Strips out {- .. -} and -- comments and counts the rest. + # Pragmas, {-#...}, are counted as SLOC. + # BUG: Doesn't handle strings with embedded block comment markers gracefully. + # In practice, that shouldn't be a problem. + my ($ra_lines, $file, ) = @_; + + print "-> remove_haskell_comments\n" if $opt_v > 2; + + my @save_lines = (); + my $in_comment = 0; + my $incomment = 0; + my ($literate, $inlitblock) = (0,0); + + $literate = 1 if $file =~ /\.lhs$/; + if($literate) { $literate = determine_lit_type($file) } + + foreach (@{$ra_lines}) { + if ($literate == 1) { + if (!s/^>//) { s/.*//; } + } elsif ($literate == 2) { + if ($inlitblock) { + if (m/^\\end{code}/) { s/.*//; $inlitblock = 0; } + } elsif (!$inlitblock) { + if (m/^\\begin{code}/) { s/.*//; $inlitblock = 1; } + else { s/.*//; } + } + } + + if ($incomment) { + if (m/\-\}/) { s/^.*?\-\}//; $incomment = 0;} + else { s/.*//; } + } + if (!$incomment) { + s/--.*//; + s!{-[^#].*?-}!!g; + if (m/{-/ && (!m/{-#/)) { + s/{-.*//; + $incomment = 1; + } + } + if (m/\S/) { push @save_lines, $_; } + } +# if ($incomment) {print "ERROR: ended in comment in $ARGV\n";} + + print "<- remove_haskell_comments\n" if $opt_v > 2; + return @save_lines; +} # 1}}} +sub print_lines { # {{{1 + my ($file , # in + $title , # in + $ra_lines , # in + ) = @_; + printf "->%-30s %s\n", $file, $title; + for (my $i = 0; $i < scalar @{$ra_lines}; $i++) { + printf "%5d | %s", $i+1, $ra_lines->[$i]; + print "\n" unless $ra_lines->[$i] =~ m{\n$} + } +} # 1}}} +sub set_constants { # {{{1 + my ($rh_Language_by_Extension , # out + $rh_Language_by_Script , # out + $rh_Language_by_File , # out + $rhaa_Filters_by_Language , # out + $rh_Not_Code_Extension , # out + $rh_Not_Code_Filename , # out + $rh_Scale_Factor , # out + $rh_Known_Binary_Archives , # out + $rh_EOL_continuation_re , # out + ) = @_; +# 1}}} +%{$rh_Language_by_Extension} = ( # {{{1 + 'abap' => 'ABAP' , + 'ac' => 'm4' , + 'ada' => 'Ada' , + 'adb' => 'Ada' , + 'ads' => 'Ada' , + 'adso' => 'ADSO/IDSM' , + 'am' => 'make' , + 'ample' => 'AMPLE' , + 'as' => 'ActionScript' , + 'dofile' => 'AMPLE' , + 'startup' => 'AMPLE' , + 'asa' => 'ASP' , + 'asax' => 'ASP.Net' , + 'ascx' => 'ASP.Net' , + 'asm' => 'Assembly' , + 'asmx' => 'ASP.Net' , + 'asp' => 'ASP' , + 'aspx' => 'ASP.Net' , + 'master' => 'ASP.Net' , + 'sitemap' => 'ASP.Net' , + 'awk' => 'awk' , + 'bash' => 'Bourne Again Shell' , + 'bas' => 'Visual Basic' , + 'bat' => 'DOS Batch' , + 'BAT' => 'DOS Batch' , + 'cbl' => 'COBOL' , + 'CBL' => 'COBOL' , + 'c' => 'C' , + 'C' => 'C++' , + 'cc' => 'C++' , + 'ccs' => 'CCS' , + 'cfm' => 'ColdFusion' , + 'cl' => 'Lisp' , + 'cls' => 'Visual Basic' , + 'CMakeLists.txt' => 'CMake' , + 'cob' => 'COBOL' , + 'COB' => 'COBOL' , + 'config' => 'ASP.Net' , + 'cpp' => 'C++' , + 'cs' => 'C#' , + 'csh' => 'C Shell' , + 'css' => "CSS" , + 'cxx' => 'C++' , + 'd' => 'D' , + 'da' => 'DAL' , + 'dart' => 'Dart' , + 'def' => 'Teamcenter def' , + 'dmap' => 'NASTRAN DMAP' , + 'dpr' => 'Pascal' , + 'dtd' => 'DTD' , + 'ec' => 'C' , + 'el' => 'Lisp' , + 'erl' => 'Erlang' , + 'exp' => 'Expect' , + 'f77' => 'Fortran 77' , + 'F77' => 'Fortran 77' , + 'f90' => 'Fortran 90' , + 'F90' => 'Fortran 90' , + 'f95' => 'Fortran 95' , + 'F95' => 'Fortran 95' , + 'f' => 'Fortran 77' , + 'F' => 'Fortran 77' , + 'fmt' => 'Oracle Forms' , + 'focexec' => 'Focus' , + 'frm' => 'Visual Basic' , + 'gnumakefile' => 'make' , + 'Gnumakefile' => 'make' , + 'go' => 'Go' , + 'groovy' => 'Groovy' , + 'h' => 'C/C++ Header' , + 'H' => 'C/C++ Header' , + 'hh' => 'C/C++ Header' , + 'hpp' => 'C/C++ Header' , + 'hrl' => 'Erlang' , + 'hs' => 'Haskell' , + 'htm' => 'HTML' , + 'html' => 'HTML' , + 'i3' => 'Modula3' , + 'idl' => 'IDL' , + 'pro' => 'IDL' , + 'ig' => 'Modula3' , + 'il' => 'SKILL' , + 'ils' => 'SKILL++' , + 'inc' => 'PHP/Pascal' , # might be PHP or Pascal + 'itk' => 'Tcl/Tk' , + 'java' => 'Java' , + 'jcl' => 'JCL' , # IBM Job Control Lang. + 'jl' => 'Lisp' , + 'js' => 'Javascript' , + 'jsp' => 'JSP' , # Java server pages + 'ksc' => 'Kermit' , + 'ksh' => 'Korn Shell' , + 'lhs' => 'Haskell' , + 'l' => 'lex' , + 'lsp' => 'Lisp' , + 'lisp' => 'Lisp' , + 'lua' => 'Lua' , + 'm3' => 'Modula3' , + 'm4' => 'm4' , + 'makefile' => 'make' , + 'Makefile' => 'make' , + 'met' => 'Teamcenter met' , + 'mg' => 'Modula3' , +# 'mli' => 'ML' , # ML not implemented +# 'ml' => 'ML' , + 'ml' => 'Ocaml' , + 'm' => 'MATLAB/Objective C/MUMPS' , + 'mm' => 'Objective C++' , + 'wdproj' => 'MSBuild scripts' , + 'csproj' => 'MSBuild scripts' , + 'mps' => 'MUMPS' , + 'mth' => 'Teamcenter mth' , + 'oscript' => 'LiveLink OScript' , + 'pad' => 'Ada' , # Oracle Ada preprocessor + 'pas' => 'Pascal' , + 'pcc' => 'C++' , # Oracle C++ preprocessor + 'perl' => 'Perl' , + 'pfo' => 'Fortran 77' , + 'pgc' => 'C' , # Postgres embedded C/C++ + 'php3' => 'PHP' , + 'php4' => 'PHP' , + 'php5' => 'PHP' , + 'php' => 'PHP' , + 'plh' => 'Perl' , + 'pl' => 'Perl' , + 'PL' => 'Perl' , + 'plx' => 'Perl' , + 'pm' => 'Perl' , + 'p' => 'Pascal' , + 'pp' => 'Pascal' , + 'psql' => 'SQL' , + 'py' => 'Python' , + 'pyx' => 'Cython' , + 'rb' => 'Ruby' , + # 'resx' => 'ASP.Net' , + 'rex' => 'Oracle Reports' , + 'rexx' => 'Rexx' , + 'rhtml' => 'Ruby HTML' , + 's' => 'Assembly' , + 'S' => 'Assembly' , + 'scala' => 'Scala' , + 'sbl' => 'Softbridge Basic' , + 'SBL' => 'Softbridge Basic' , + 'sc' => 'Lisp' , + 'scm' => 'Lisp' , + 'sed' => 'sed' , + 'ses' => 'Patran Command Language' , + 'pcl' => 'Patran Command Language' , + 'sh' => 'Bourne Shell' , + 'smarty' => 'Smarty' , + 'sql' => 'SQL' , + 'SQL' => 'SQL' , + 'sproc.sql' => 'SQL Stored Procedure' , + 'spoc.sql' => 'SQL Stored Procedure' , + 'spc.sql' => 'SQL Stored Procedure' , + 'udf.sql' => 'SQL Stored Procedure' , + 'data.sql' => 'SQL Data' , + 'tcl' => 'Tcl/Tk' , + 'tcsh' => 'C Shell' , + 'tk' => 'Tcl/Tk' , + 'tpl' => 'Smarty' , + 'vhd' => 'VHDL' , + 'VHD' => 'VHDL' , + 'vhdl' => 'VHDL' , + 'VHDL' => 'VHDL' , + 'vba' => 'Visual Basic' , + 'VBA' => 'Visual Basic' , + # 'vbp' => 'Visual Basic' , # .vbp - autogenerated + 'vb' => 'Visual Basic' , + 'VB' => 'Visual Basic' , + # 'vbw' => 'Visual Basic' , # .vbw - autogenerated + 'vbs' => 'Visual Basic' , + 'VBS' => 'Visual Basic' , + 'webinfo' => 'ASP.Net' , + 'xml' => 'XML' , + 'XML' => 'XML' , + 'mxml' => 'MXML' , + 'build' => 'NAnt scripts' , + 'vim' => 'vim script' , + 'xaml' => 'XAML' , + 'xsd' => 'XSD' , + 'XSD' => 'XSD' , + 'xslt' => 'XSLT' , + 'XSLT' => 'XSLT' , + 'xsl' => 'XSLT' , + 'XSL' => 'XSLT' , + 'y' => 'yacc' , + 'yaml' => 'YAML' , + 'yml' => 'YAML' , + ); +# 1}}} +%{$rh_Language_by_Script} = ( # {{{1 + 'awk' => 'awk' , + 'bash' => 'Bourne Again Shell' , + 'bc' => 'bc' ,# calculator + 'csh' => 'C Shell' , + 'dmd' => 'D' , + 'idl' => 'IDL' , + 'kermit' => 'Kermit' , + 'ksh' => 'Korn Shell' , + 'lua' => 'Lua' , + 'make' => 'make' , + 'octave' => 'Octave' , + 'perl5' => 'Perl' , + 'perl' => 'Perl' , + 'ruby' => 'Ruby' , + 'sed' => 'sed' , + 'sh' => 'Bourne Shell' , + 'tcl' => 'Tcl/Tk' , + 'tclsh' => 'Tcl/Tk' , + 'tcsh' => 'C Shell' , + 'wish' => 'Tcl/Tk' , + ); +# 1}}} +%{$rh_Language_by_File} = ( # {{{1 + 'Makefile' => 'make' , + 'makefile' => 'make' , + 'gnumakefile' => 'make' , + 'Gnumakefile' => 'make' , + 'CMakeLists.txt' => 'CMake' , + ); +# 1}}} +%{$rhaa_Filters_by_Language} = ( # {{{1 + 'ABAP' => [ [ 'remove_matches' , '^\*' ], ], + 'ActionScript' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + ], + + 'ASP' => [ [ 'remove_matches' , '^\s*\47'], ], # \47 = ' + 'ASP.Net' => [ [ 'call_regexp_common' , 'C' ], ], + 'Ada' => [ [ 'remove_matches' , '^\s*--' ], ], + 'ADSO/IDSM' => [ [ 'remove_matches' , '^\s*\*[\+\!]' ], ], + 'AMPLE' => [ [ 'remove_matches' , '^\s*//' ], ], + 'Assembly' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'remove_matches' , '^\s*;' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + [ 'remove_inline' , ';.*$' ], + ], + 'awk' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'bc' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'C' => [ + [ 'remove_matches' , '^\s*//' ], # C99 + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], # C99 + ], + 'C++' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'remove_inline' , '//.*$' ], + [ 'call_regexp_common' , 'C' ], + ], + 'C/C++ Header' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + ], + 'CMake' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Cython' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'docstring_to_C' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '#.*$' ], + ], + 'C#' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + ], + 'CCS' => [ [ 'call_regexp_common' , 'C' ], ], + 'CSS' => [ [ 'call_regexp_common' , 'C' ], ], + 'COBOL' => [ [ 'remove_cobol_comments', ], ], + 'ColdFusion' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'Crystal Reports' => [ [ 'remove_matches' , '^\s*//' ], ], + 'D' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + ], + 'DAL' => [ [ 'remove_between' , '[]', ], ], + 'Dart' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'remove_inline' , '//.*$' ], + [ 'call_regexp_common' , 'C' ], + ], + 'NASTRAN DMAP' => [ + [ 'remove_matches' , '^\s*\$' ], + [ 'remove_inline' , '\$.*$' ], + ], + 'DOS Batch' => [ [ 'remove_matches' , '^\s*rem', ], ], + 'DTD' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'Erlang' => [ + [ 'remove_matches' , '^\s*%' ], + [ 'remove_inline' , '%.*$' ], + ], + 'Expect' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Focus' => [ [ 'remove_matches' , '^\s*\-\*' ], ], + 'Fortran 77' => [ + [ 'remove_f77_comments' , ], + [ 'remove_inline' , '\!.*$' ], + ], + 'Fortran 90' => [ + [ 'remove_f77_comments' , ], + [ 'remove_f90_comments' , ], + [ 'remove_inline' , '\!.*$' ], + ], + 'Fortran 95' => [ + [ 'remove_f77_comments' , ], + [ 'remove_f90_comments' , ], + [ 'remove_inline' , '\!.*$' ], + ], + 'Go' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'remove_inline' , '//.*$' ], + [ 'call_regexp_common' , 'C' ], + ], + 'Groovy' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'remove_inline' , '//.*$' ], + [ 'call_regexp_common' , 'C' ], + ], + 'HTML' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'Haskell' => [ [ 'remove_haskell_comments', '>filename<' ], ], + 'IDL' => [ [ 'remove_matches' , '^\s*;' ], ], + 'JSP' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], + [ 'remove_jsp_comments' , ], + [ 'remove_matches' , '^\s*//' ], + [ 'add_newlines' , ], + [ 'call_regexp_common' , 'C' ], + ], + 'Java' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + ], + 'Javascript' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + ], + 'JCL' => [ [ 'remove_jcl_comments' , ], ], + 'Lisp' => [ [ 'remove_matches' , '^\s*;' ], ], + 'LiveLink OScript' => [ [ 'remove_matches' , '^\s*//' ], ], +# 'Lua' => [ [ 'call_regexp_common' , 'lua' ], ], + 'Lua' => [ [ 'remove_matches' , '^\s*\-\-' ], ], + 'make' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'MATLAB' => [ + [ 'remove_matches' , '^\s*%' ], + [ 'remove_inline' , '%.*$' ], + ], + 'Modula3' => [ [ 'call_regexp_common' , 'Pascal' ], ], + # Modula 3 comments are (* ... *) so applying the Pascal filter + # which also treats { ... } as a comment is not really correct. + 'Objective C' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + ], + 'Objective C++' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '//.*$' ], + ], + 'Ocaml' => [ + [ 'call_regexp_common' , 'Pascal' ], + ], + 'PHP/Pascal' => [ [ 'die' , ], ], # never called + 'MATLAB/Objective C/MUMPS' => [ [ 'die' , ], ], # never called + 'MUMPS' => [ [ 'remove_matches' , '^\s*;' ], ], + 'Octave' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Oracle Forms' => [ [ 'call_regexp_common' , 'C' ], ], + 'Oracle Reports' => [ [ 'call_regexp_common' , 'C' ], ], + 'Pascal' => [ + [ 'call_regexp_common' , 'Pascal' ], + [ 'remove_matches' , '^\s*//' ], + ], + 'Patran Command Language'=> [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_matches' , '^\s*\$#' ], + [ 'call_regexp_common' , 'C' ], + ], + 'Perl' => [ [ 'remove_below' , '^__(END|DATA)__'], + [ 'remove_matches' , '^\s*#' ], + [ 'remove_below_above' , '^=head1', '^=cut' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Python' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'docstring_to_C' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '#.*$' ], + ], + 'PHP' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '#.*$' ], + [ 'remove_inline' , '//.*$' ], + ], + 'Rexx' => [ [ 'call_regexp_common' , 'C' ], ], + 'Ruby' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Ruby HTML' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'Scala' => [ + [ 'remove_matches' , '^\s*//' ], + [ 'remove_inline' , '//.*$' ], + [ 'call_regexp_common' , 'C' ], + ], + 'SKILL' => [ + [ 'call_regexp_common' , 'C' ], + [ 'remove_matches' , '^\s*;' ], + ], + 'SKILL++' => [ + [ 'call_regexp_common' , 'C' ], + [ 'remove_matches' , '^\s*;' ], + ], + 'SQL' => [ + [ 'call_regexp_common' , 'C' ], + [ 'remove_matches' , '^\s*--' ], + [ 'remove_inline' , '--.*$' ], + ], + 'SQL Stored Procedure'=> [ + [ 'call_regexp_common' , 'C' ], + [ 'remove_matches' , '^\s*--' ], + [ 'remove_inline' , '--.*$' ], + ], + 'SQL Data' => [ + [ 'call_regexp_common' , 'C' ], + [ 'remove_matches' , '^\s*--' ], + [ 'remove_inline' , '--.*$' ], + ], + 'sed' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Smarty' => [ + [ 'smarty_to_C' ], + [ 'call_regexp_common' , 'C' ], + ], + 'Bourne Again Shell' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Bourne Shell' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'm4' => [ [ 'remove_matches' , '^dnl ' ], ], + 'C Shell' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Kermit' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_matches' , '^\s*;' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Korn Shell' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Tcl/Tk' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'Teamcenter def' => [ [ 'remove_matches' , '^\s*#' ], ], + 'Teamcenter met' => [ [ 'call_regexp_common' , 'C' ], ], + 'Teamcenter mth' => [ [ 'remove_matches' , '^\s*#' ], ], + 'Softbridge Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ], + [ 'remove_matches' , '^\s*Attribute\s+'], + [ 'remove_matches' , '^\s*\47'], ], # \47 = ' + # http://www.altium.com/files/learningguides/TR0114%20VHDL%20Language%20Reference.pdf + 'VHDL' => [ + [ 'remove_matches' , '^\s*--' ], + [ 'remove_matches' , '^\s*//' ], + [ 'call_regexp_common' , 'C' ], + [ 'remove_inline' , '--.*$' ], + [ 'remove_inline' , '//.*$' ], + ], + 'vim script' => [ + [ 'remove_matches' , '^\s*"' ], + [ 'remove_inline' , '".*$' ], + ], + 'Visual Basic' => [ [ 'remove_above' , '^\s*Attribute\s+VB_Name\s+=' ], + [ 'remove_matches' , '^\s*Attribute\s+'], + [ 'remove_matches' , '^\s*\47'], ], # \47 = ' + 'yacc' => [ [ 'call_regexp_common' , 'C' ], ], + 'YAML' => [ + [ 'remove_matches' , '^\s*#' ], + [ 'remove_inline' , '#.*$' ], + ], + 'lex' => [ [ 'call_regexp_common' , 'C' ], ], + 'XAML' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'MXML' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'XML' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'XSD' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'XSLT' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'NAnt scripts' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + 'MSBuild scripts' => [ [ 'remove_html_comments', ], + [ 'call_regexp_common' , 'HTML' ], ], + ); +# 1}}} +%{$rh_EOL_continuation_re} = ( # {{{1 + 'ActionScript' => '\\\\$' , + 'Assembly' => '\\\\$' , + 'ASP' => '\\\\$' , + 'ASP.Net' => '\\\\$' , + 'Ada' => '\\\\$' , + 'awk' => '\\\\$' , + 'bc' => '\\\\$' , + 'C' => '\\\\$' , + 'C++' => '\\\\$' , + 'C/C++ Header' => '\\\\$' , + 'CMake' => '\\\\$' , + 'Cython' => '\\\\$' , + 'C#' => '\\\\$' , + 'D' => '\\\\$' , + 'Dart' => '\\\\$' , + 'Expect' => '\\\\$' , + 'Go' => '\\\\$' , + 'Java' => '\\\\$' , + 'Javascript' => '\\\\$' , + 'Lua' => '\\\\$' , + 'make' => '\\\\$' , + 'MATLAB' => '\.\.\.\s*$' , + 'Objective C' => '\\\\$' , + 'Objective C++' => '\\\\$' , + 'Ocaml' => '\\\\$' , + 'Octave' => '\.\.\.\s*$' , + 'Patran Command Language'=> '\\\\$' , + 'Python' => '\\\\$' , + 'Ruby' => '\\\\$' , + 'sed' => '\\\\$' , + 'Bourne Again Shell' => '\\\\$' , + 'Bourne Shell' => '\\\\$' , + 'C Shell' => '\\\\$' , + 'Kermit' => '\\\\$' , + 'Korn Shell' => '\\\\$' , + 'Tcl/Tk' => '\\\\$' , + 'lex' => '\\\\$' , + ); +# 1}}} +%{$rh_Not_Code_Extension} = ( # {{{1 + '1' => 1, # Man pages (documentation): + '2' => 1, + '3' => 1, + '4' => 1, + '5' => 1, + '6' => 1, + '7' => 1, + '8' => 1, + '9' => 1, + 'a' => 1, # Static object code. + 'ad' => 1, # X application default resource file. + 'afm' => 1, # font metrics + 'arc' => 1, # arc(1) archive + 'arj' => 1, # arj(1) archive + 'au' => 1, # Audio sound filearj(1) archive + 'bak' => 1, # Backup files - we only want to count the "real" files. + 'bdf' => 1, + 'bmp' => 1, + 'bz2' => 1, # bzip2(1) compressed file + 'csv' => 1, # comma separated values + 'desktop' => 1, + 'dic' => 1, + 'doc' => 1, + 'elc' => 1, + 'eps' => 1, + 'fig' => 1, + 'gif' => 1, + 'gz' => 1, + 'hdf' => 1, # hierarchical data format + 'in' => 1, # Debatable. + 'jpg' => 1, + 'kdelnk' => 1, + 'man' => 1, + 'mf' => 1, + 'mp3' => 1, + 'n' => 1, + 'o' => 1, # Object code is generated from source code. + 'pbm' => 1, + 'pdf' => 1, + 'pfb' => 1, + 'png' => 1, + 'po' => 1, + 'ps' => 1, # Postscript is _USUALLY_ generated automatically. + 'sgm' => 1, + 'sgml' => 1, + 'so' => 1, # Dynamically-loaded object code. + 'Tag' => 1, + 'tex' => 1, + 'text' => 1, + 'tfm' => 1, + 'tgz' => 1, # gzipped tarball + 'tiff' => 1, + 'txt' => 1, + 'vf' => 1, + 'wav' => 1, + 'xbm' => 1, + 'xpm' => 1, + 'Y' => 1, # file compressed with "Yabba" + 'Z' => 1, # file compressed with "compress" + 'zip' => 1, # zip archive +); # 1}}} +%{$rh_Not_Code_Filename} = ( # {{{1 + 'AUTHORS' => 1, + 'BUGS' => 1, + 'BUGS' => 1, + 'Changelog' => 1, + 'ChangeLog' => 1, + 'ChangeLog' => 1, + 'Changes' => 1, + 'CHANGES' => 1, + 'COPYING' => 1, + 'COPYING' => 1, + '.cvsignore' => 1, + 'Entries' => 1, + 'FAQ' => 1, + 'iconfig.h' => 1, # Skip "iconfig.h" files; they're used in Imakefiles. + 'INSTALL' => 1, + 'MAINTAINERS' => 1, + 'MD5SUMS' => 1, + 'NEWS' => 1, + 'readme' => 1, + 'Readme' => 1, + 'README' => 1, + 'README.tk' => 1, # used in kdemultimedia, it's confusing. + 'Repository' => 1, + 'Root' => 1, # CVS + 'TODO' => 1, +); +# 1}}} +%{$rh_Scale_Factor} = ( # {{{1 + '1032/af' => 5.00, + '1st generation default' => 0.25, + '2nd generation default' => 0.75, + '3rd generation default' => 1.00, + '4th generation default' => 4.00, + '5th generation default' => 16.00, + 'aas macro' => 0.88, + 'abap/4' => 5.00, + 'ABAP' => 5.00, + 'accel' => 4.21, + 'access' => 2.11, + 'ActionScript' => 1.36, + 'actor' => 3.81, + 'acumen' => 2.86, + 'Ada' => 0.52, + 'Ada 83' => 1.13, + 'Ada 95' => 1.63, + 'adr/dl' => 2.00, + 'adr/ideal/pdl' => 4.00, + 'ads/batch' => 4.00, + 'ads/online' => 4.00, + 'ADSO/IDSM' => 3.00, + 'advantage' => 2.11, + 'ai shell default' => 1.63, + 'ai shells' => 1.63, + 'algol 68' => 0.75, + 'algol w' => 0.75, + 'ambush' => 2.50, + 'aml' => 1.63, + 'AMPLE' => 2.00, + 'amppl ii' => 1.25, + 'ansi basic' => 1.25, + 'ansi cobol 74' => 0.75, + 'ansi cobol 85' => 0.88, + 'SQL' => 6.15, + 'SQL Stored Procedure' => 6.15, + 'SQL Data' => 1.00, + 'answer/db' => 6.15, + 'apl 360/370' => 2.50, + 'apl default' => 2.50, + 'apl*plus' => 2.50, + 'applesoft basic' => 0.63, + 'application builder' => 4.00, + 'application manager' => 2.22, + 'aps' => 0.96, + 'aps' => 4.71, + 'apt' => 1.13, + 'aptools' => 4.00, + 'arc' => 1.63, + 'ariel' => 0.75, + 'arity' => 1.63, + 'arity prolog' => 1.25, + 'art' => 1.63, + 'art enterprise' => 1.74, + 'artemis' => 2.00, + 'artim' => 1.74, + 'as/set' => 4.21, + 'asi/inquiry' => 6.15, + 'ask windows' => 1.74, +'asa' => 1.29, +'ASP' => 1.29, +'ASP.Net' => 1.29, +'aspx' => 1.29, +#'resx' => 1.29, +'asax' => 1.29, +'ascx' => 1.29, +'asmx' => 1.29, +'config' => 1.29, +'webinfo' => 1.29, +'CCS' => 5.33, + +# 'assembler (basic)' => 0.25, + 'Assembly' => 0.25, + + 'Assembly (macro)' => 0.51, + 'associative default' => 1.25, + 'autocoder' => 0.25, + 'awk' => 3.81, + 'aztec c' => 0.63, + 'balm' => 0.75, + 'base sas' => 1.51, + 'basic' => 0.75, + 'basic a' => 0.63, +# 'basic assembly' => 0.25, + 'bc' => 1.50, + 'berkeley pascal' => 0.88, + 'better basic' => 0.88, + 'bliss' => 0.75, + 'bmsgen' => 2.22, + 'boeingcalc' => 13.33, + 'bteq' => 6.15, + + 'C' => 0.77, + + 'c set 2' => 0.88, + + 'C#' => 1.36, + + 'C++' => 1.51, + + 'c86plus' => 0.63, + 'cadbfast' => 2.00, + 'caearl' => 2.86, + 'cast' => 1.63, + 'cbasic' => 0.88, + 'cdadl' => 4.00, + 'cellsim' => 1.74, +'ColdFusion' => 4.00, + 'chili' => 0.75, + 'chill' => 0.75, + 'cics' => 1.74, + 'clarion' => 1.38, + 'clascal' => 1.00, + 'cli' => 2.50, + 'clipper' => 2.05, + 'clipper db' => 2.00, + 'clos' => 3.81, + 'clout' => 2.00, + 'CMake' => 1.00, + 'cms2' => 0.75, + 'cmsgen' => 4.21, + 'COBOL' => 1.04, + 'COBOL ii' => 0.75, + 'COBOL/400' => 0.88, + 'cobra' => 4.00, + 'codecenter' => 2.22, + 'cofac' => 2.22, + 'cogen' => 2.22, + 'cognos' => 2.22, + 'cogo' => 1.13, + 'comal' => 1.00, + 'comit ii' => 1.25, + 'common lisp' => 1.25, + 'concurrent pascal' => 1.00, + 'conniver' => 1.25, + 'cool:gen/ief' => 2.58, + 'coral 66' => 0.75, + 'corvet' => 4.21, + 'corvision' => 5.33, + 'cpl' => 0.50, + 'Crystal Reports' => 4.00, + 'csl' => 1.63, + 'csp' => 1.51, + 'cssl' => 1.74, + +'CSS' => 1.0, + + 'culprit' => 1.57, + 'cxpert' => 1.63, + 'cygnet' => 4.21, + 'D' => 1.70, + 'DAL' => 1.50, + 'Dart' => 2.00, + 'data base default' => 2.00, + 'dataflex' => 2.00, + 'datatrieve' => 4.00, + 'dbase iii' => 2.00, + 'dbase iv' => 1.54, + 'dcl' => 0.38, + 'decision support default' => 2.22, + 'decrally' => 2.00, + 'delphi' => 2.76, + 'dl/1' => 2.00, + 'NASTRAN DMAP' => 2.35, + 'dna4' => 4.21, + 'DOS Batch' => 0.63, + 'dsp assembly' => 0.50, + 'dtabl' => 1.74, + 'dtipt' => 1.74, + 'dyana' => 1.13, + 'dynamoiii' => 1.74, + 'easel' => 2.76, + 'easy' => 1.63, + 'easytrieve+' => 2.35, + 'eclipse' => 1.63, + 'eda/sql' => 6.67, + 'edscheme 3.4' => 1.51, + 'eiffel' => 3.81, + 'enform' => 1.74, + 'englishbased default' => 1.51, + 'ensemble' => 2.76, + 'epos' => 4.00, + 'Erlang' => 2.11, + 'esf' => 2.00, + 'espadvisor' => 1.63, + 'espl/i' => 1.13, + 'euclid' => 0.75, + 'excel' => 1.74, + 'excel 12' => 13.33, + 'excel 34' => 13.33, + 'excel 5' => 13.33, + 'express' => 2.22, + 'exsys' => 1.63, + 'extended common lisp' => 1.43, + 'eznomad' => 2.22, + 'facets' => 4.00, + 'factorylink iv' => 2.76, + 'fame' => 2.22, + 'filemaker pro' => 2.22, + 'flavors' => 2.76, + 'flex' => 1.74, + 'flexgen' => 2.76, + 'Focus' => 1.90, + 'foil' => 1.51, + 'forte' => 4.44, + 'forth' => 1.25, + 'Fortran 66' => 0.63, + 'Fortran 77' => 0.75, + 'Fortran 90' => 1.00, + 'Fortran 95' => 1.13, + 'Fortran II' => 0.63, + 'foundation' => 2.76, + 'foxpro' => 2.29, + 'foxpro 1' => 2.00, + 'foxpro 2.5' => 2.35, + 'framework' => 13.33, + 'g2' => 1.63, + 'gamma' => 5.00, + 'genascript' => 2.96, + 'gener/ol' => 6.15, + 'genexus' => 5.33, + 'genifer' => 4.21, + 'geode 2.0' => 5.00, + 'gfa basic' => 2.35, + 'gml' => 1.74, + 'golden common lisp' => 1.25, + 'gpss' => 1.74, + 'guest' => 2.86, + 'guru' => 1.63, + 'Go' => 2.50, + 'Groovy' => 4.10, + 'gw basic' => 0.82, + 'Haskell' => 2.11, + 'high c' => 0.63, + 'hlevel' => 1.38, + 'hp basic' => 0.63, + +'HTML' => 1.90 , +'XML' => 1.90 , +'MXML' => 1.90 , +'XSLT' => 1.90 , +'DTD' => 1.90 , +'XSD' => 1.90 , +'NAnt scripts' => 1.90 , +'MSBuild scripts' => 1.90 , + + 'HTML 2' => 5.00, + 'HTML 3' => 5.33, + 'huron' => 5.00, + 'ibm adf i' => 4.00, + 'ibm adf ii' => 4.44, + 'ibm advanced basic' => 0.82, + 'ibm cics/vs' => 2.00, + 'ibm compiled basic' => 0.88, + 'ibm vs cobol' => 0.75, + 'ibm vs cobol ii' => 0.88, + 'ices' => 1.13, + 'icon' => 1.00, + 'ideal' => 1.54, + 'idms' => 2.00, + 'ief' => 5.71, + 'ief/cool:gen' => 2.58, + 'iew' => 5.71, + 'ifps/plus' => 2.50, + 'imprs' => 2.00, + 'informix' => 2.58, + 'ingres' => 2.00, + 'inquire' => 6.15, + 'insight2' => 1.63, + 'install/1' => 5.00, + 'intellect' => 1.51, + 'interlisp' => 1.38, + 'interpreted basic' => 0.75, + 'interpreted c' => 0.63, + 'iqlisp' => 1.38, + 'iqrp' => 6.15, + 'j2ee' => 1.60, + 'janus' => 1.13, + 'Java' => 1.36, +'Javascript' => 1.48, +'JSP' => 1.48, + 'JCL' => 1.67, + 'joss' => 0.75, + 'jovial' => 0.75, + 'jsp' => 1.36, + 'kappa' => 2.00, + 'kbms' => 1.63, + 'kcl' => 1.25, + 'kee' => 1.63, + 'keyplus' => 2.00, + 'kl' => 1.25, + 'klo' => 1.25, + 'knowol' => 1.63, + 'krl' => 1.38, + 'Kermit' => 2.00, + 'Korn Shell' => 3.81, + 'ladder logic' => 2.22, + 'lambit/l' => 1.25, + 'lattice c' => 0.63, + 'liana' => 0.63, + 'lilith' => 1.13, + 'linc ii' => 5.71, + 'Lisp' => 1.25, + 'LiveLink OScript' => 3.5 , + 'loglisp' => 1.38, + 'loops' => 3.81, + 'lotus 123 dos' => 13.33, + 'lotus macros' => 0.75, + 'lotus notes' => 3.64, + 'lucid 3d' => 13.33, + 'lyric' => 1.51, + 'm4' => 1.00, + 'm' => 5.00, + 'macforth' => 1.25, + 'mach1' => 2.00, + 'machine language' => 0.13, + 'maestro' => 5.00, + 'magec' => 5.00, + 'magik' => 3.81, + 'Lake' => 3.81, + 'make' => 2.50, + 'mantis' => 2.96, + 'mapper' => 0.99, + 'mark iv' => 2.00, + 'mark v' => 2.22, + 'mathcad' => 16.00, + 'mdl' => 2.22, + 'mentor' => 1.51, + 'mesa' => 0.75, + 'microfocus cobol' => 1.00, + 'microforth' => 1.25, + 'microsoft c' => 0.63, + 'microstep' => 4.00, + 'miranda' => 2.00, + 'model 204' => 2.11, + 'modula 2' => 1.00, + 'mosaic' => 13.33, + # 'ms c ++ v. 7' => 1.51, + 'ms compiled basic' => 0.88, + 'msl' => 1.25, + 'mulisp' => 1.25, + 'MUMPS' => 4.21, + 'Nastran' => 1.13, + 'natural' => 1.54, + 'natural 1' => 1.51, + 'natural 2' => 1.74, + 'natural construct' => 3.20, + 'natural language' => 0.03, + 'netron/cap' => 4.21, + 'nexpert' => 1.63, + 'nial' => 1.63, + 'nomad2' => 2.00, + 'nonprocedural default' => 2.22, + 'notes vip' => 2.22, + 'nroff' => 1.51, + 'object assembler' => 1.25, + 'object lisp' => 2.76, + 'object logo' => 2.76, + 'object pascal' => 2.76, + 'object star' => 5.00, + 'Objective C' => 2.96, + 'Objective C++' => 2.96, + 'objectoriented default' => 2.76, + 'objectview' => 3.20, + 'Ocaml' => 3.00, + 'ogl' => 1.00, + 'omnis 7' => 2.00, + 'oodl' => 2.76, + 'ops' => 1.74, + 'ops5' => 1.38, + 'oracle' => 2.76, + 'Oracle Reports' => 2.76, + 'Oracle Forms' => 2.67, + 'Oracle Developer/2000' => 3.48, + 'oscar' => 0.75, + 'pacbase' => 1.67, + 'pace' => 2.00, + 'paradox/pal' => 2.22, + 'Pascal' => 0.88, + 'Patran Command Language' => 2.50, + 'pc focus' => 2.22, + 'pdl millenium' => 3.81, + 'pdp11 ade' => 1.51, + 'peoplesoft' => 2.50, + 'Perl' => 4.00, + 'persistance object builder' => 3.81, + 'pilot' => 1.51, + 'pl/1' => 1.38, + 'pl/m' => 1.13, + 'pl/s' => 0.88, + 'pl/sql' => 2.58, + 'planit' => 1.51, + 'planner' => 1.25, + 'planperfect 1' => 11.43, + 'plato' => 1.51, + 'polyforth' => 1.25, + 'pop' => 1.38, + 'poplog' => 1.38, + 'power basic' => 1.63, + 'powerbuilder' => 3.33, + 'powerhouse' => 5.71, + 'ppl (plus)' => 2.00, + 'problemoriented default' => 1.13, + 'proc' => 2.96, + 'procedural default' => 0.75, + 'professional pascal' => 0.88, + 'program generator default' => 5.00, + 'progress v4' => 2.22, + 'proiv' => 1.38, + 'prolog' => 1.25, + 'prose' => 0.75, + 'proteus' => 0.75, + 'qbasic' => 1.38, + 'qbe' => 6.15, + 'qmf' => 5.33, + 'qnial' => 1.63, + 'quattro' => 13.33, + 'quattro pro' => 13.33, + 'query default' => 6.15, + 'quick basic 1' => 1.25, + 'quick basic 2' => 1.31, + 'quick basic 3' => 1.38, + 'quick c' => 0.63, + 'quickbuild' => 2.86, + 'quiz' => 5.33, + 'rally' => 2.00, + 'ramis ii' => 2.00, + 'rapidgen' => 2.86, + 'ratfor' => 0.88, + 'rdb' => 2.00, + 'realia' => 1.74, + 'realizer 1.0' => 2.00, + 'realizer 2.0' => 2.22, + 'relate/3000' => 2.00, + 'reuse default' => 16.00, + 'Rexx' => 1.19, + 'Rexx (mvs)' => 1.00, + 'Rexx (os/2)' => 1.74, + 'rm basic' => 0.88, + 'rm cobol' => 0.75, + 'rm fortran' => 0.75, + 'rpg i' => 1.00, + 'rpg ii' => 1.63, + 'rpg iii' => 1.63, + 'rtexpert 1.4' => 1.38, + 'sabretalk' => 0.90, + 'sail' => 0.75, + 'sapiens' => 5.00, + 'sas' => 1.95, + 'savvy' => 6.15, + 'sbasic' => 0.88, + 'Scala' => 4.10, + 'sceptre' => 1.13, + 'scheme' => 1.51, + 'screen painter default' => 13.33, + 'sequal' => 6.67, + 'Bourne Shell' => 3.81, + 'Bourne Again Shell' => 3.81, + 'ksh' => 3.81, + 'C Shell' => 3.81, + 'siebel tools ' => 6.15, + 'simplan' => 2.22, + 'simscript' => 1.74, + 'simula' => 1.74, + 'simula 67' => 1.74, + 'simulation default' => 1.74, + 'SKILL' => 2.00, + 'SKILL++' => 2.00, + 'slogan' => 0.98, + 'smalltalk' => 2.50, + 'smalltalk 286' => 3.81, + 'smalltalk 80' => 3.81, + 'smalltalk/v' => 3.81, + 'Smarty' => 3.50, + 'snap' => 1.00, + 'snobol24' => 0.63, + 'softscreen' => 5.71, + 'Softbridge Basic' => 2.76, + 'solo' => 1.38, + 'speakeasy' => 2.22, + 'spinnaker ppl' => 2.22, + 'splus' => 2.50, + 'spreadsheet default' => 13.33, + 'sps' => 0.25, + 'spss' => 2.50, + 'SQL' => 2.29, + 'sqlwindows' => 6.67, + 'statistical default' => 2.50, + 'strategem' => 2.22, + 'stress' => 1.13, + 'strongly typed default' => 0.88, + 'style' => 1.74, + 'superbase 1.3' => 2.22, + 'surpass' => 13.33, + 'sybase' => 2.00, + 'symantec c++' => 2.76, + 'symbolang' => 1.25, + 'synchroworks' => 4.44, + 'synon/2e' => 4.21, + 'systemw' => 2.22, + 'tandem access language' => 0.88, + 'Tcl/Tk' => 4.00, + 'Teamcenter def' => 1.00, + 'Teamcenter met' => 1.00, + 'Teamcenter mth' => 1.00, + 'telon' => 5.00, + 'tessaract' => 2.00, + 'the twin' => 13.33, + 'themis' => 6.15, + 'tiief' => 5.71, + 'topspeed c++' => 2.76, + 'transform' => 5.33, + 'translisp plus' => 1.43, + 'treet' => 1.25, + 'treetran' => 1.25, + 'trs80 basic' => 0.63, + 'true basic' => 1.25, + 'turbo c' => 0.63, + # 'turbo c++' => 1.51, + 'turbo expert' => 1.63, + 'turbo pascal >5' => 1.63, + 'turbo pascal 14' => 1.00, + 'turbo pascal 45' => 1.13, + 'turbo prolog' => 1.00, + 'turing' => 1.00, + 'tutor' => 1.51, + 'twaice' => 1.63, + 'ucsd pascal' => 0.88, + 'ufo/ims' => 2.22, + 'uhelp' => 2.50, + 'uniface' => 5.00, + # 'unix shell scripts' => 3.81, + 'vax acms' => 1.38, + 'vax ade' => 2.00, + 'vbscript' => 2.35, + 'vectran' => 0.75, + 'VHDL' => 4.21, + 'vim script' => 3.00, + 'visible c' => 1.63, + 'visible cobol' => 2.00, + 'visicalc 1' => 8.89, + 'visual 4.0' => 2.76, + 'visual basic' => 1.90, + 'visual basic 1' => 1.74, + 'visual basic 2' => 1.86, + 'visual basic 3' => 2.00, + 'visual basic 4' => 2.22, + 'visual basic 5' => 2.76, + 'Visual Basic' => 2.76, + 'visual basic dos' => 2.00, + 'visual c++' => 2.35, + 'visual cobol' => 4.00, + 'visual objects' => 5.00, + 'visualage' => 3.81, + 'visualgen' => 4.44, + 'vpf' => 0.84, + 'vsrexx' => 2.50, + 'vulcan' => 1.25, + 'vz programmer' => 2.22, + 'warp x' => 2.00, + 'watcom c' => 0.63, + 'watcom c/386' => 0.63, + 'waterloo c' => 0.63, + 'waterloo pascal' => 0.88, + 'watfiv' => 0.94, + 'watfor' => 0.88, + 'web scripts' => 5.33, + 'whip' => 0.88, + 'wizard' => 2.86, + 'xlisp' => 1.25, + 'XAML' => 1.90, + 'yacc' => 1.51, + 'yacc++' => 1.51, + 'YAML' => 0.90, + 'zbasic' => 0.88, + 'zim' => 4.21, + 'zlisp' => 1.25, + +'Expect' => 2.00, +'C/C++ Header' => 1.00, +'inc' => 1.00, +'lex' => 1.00, +'MATLAB' => 4.00, +'IDL' => 3.80, +'Octave' => 4.00, +'ML' => 3.00, +'Modula3' => 2.00, +'PHP' => 3.50, +'Python' => 4.20, +'Cython' => 3.80, +'Ruby' => 4.20, +'Ruby HTML' => 4.00, +'sed' => 4.00, +'Lua' => 4.00, +); +# 1}}} +%{$rh_Known_Binary_Archives} = ( # {{{1 + '.tar' => 1 , + '.tar.Z' => 1 , + '.tar.gz' => 1 , + '.tar.bz2' => 1 , + '.zip' => 1 , + '.Zip' => 1 , + '.ZIP' => 1 , + '.ear' => 1 , # Java + '.war' => 1 , # contained within .ear + ); +# 1}}} +} # end sub set_constants() +sub Install_Regexp_Common { # {{{1 + # Installs portions of Damian Conway's & Abigail's Regexp::Common + # module, v2.120, into a temporary directory for the duration of + # this run. + + my %Regexp_Common_Contents = (); +$Regexp_Common_Contents{'Common'} = <<'EOCommon'; # {{{2 +package Regexp::Common; + +use 5.00473; +use strict; + +local $^W = 1; + +use vars qw /$VERSION %RE %sub_interface $AUTOLOAD/; + +($VERSION) = q $Revision: 2.120 $ =~ /([\d.]+)/; + + +sub _croak { + require Carp; + goto &Carp::croak; +} + +sub _carp { + require Carp; + goto &Carp::carp; +} + +sub new { + my ($class, @data) = @_; + my %self; + tie %self, $class, @data; + return \%self; +} + +sub TIEHASH { + my ($class, @data) = @_; + bless \@data, $class; +} + +sub FETCH { + my ($self, $extra) = @_; + return bless ref($self)->new(@$self, $extra), ref($self); +} + +# Modification for cloc: only need a few modules from Regexp::Common. +my %imports = map {$_ => "Regexp::Common::$_"} + qw /balanced comment delimited /; +#my %imports = map {$_ => "Regexp::Common::$_"} +# qw /balanced CC comment delimited lingua list +# net number profanity SEN URI whitespace +# zip/; + +sub import { + shift; # Shift off the class. + tie %RE, __PACKAGE__; + { + no strict 'refs'; + *{caller() . "::RE"} = \%RE; + } + + my $saw_import; + my $no_defaults; + my %exclude; + foreach my $entry (grep {!/^RE_/} @_) { + if ($entry eq 'pattern') { + no strict 'refs'; + *{caller() . "::pattern"} = \&pattern; + next; + } + # This used to prevent $; from being set. We still recognize it, + # but we won't do anything. + if ($entry eq 'clean') { + next; + } + if ($entry eq 'no_defaults') { + $no_defaults ++; + next; + } + if (my $module = $imports {$entry}) { + $saw_import ++; + eval "require $module;"; + die $@ if $@; + next; + } + if ($entry =~ /^!(.*)/ && $imports {$1}) { + $exclude {$1} ++; + next; + } + # As a last resort, try to load the argument. + my $module = $entry =~ /^Regexp::Common/ + ? $entry + : "Regexp::Common::" . $entry; + eval "require $module;"; + die $@ if $@; + } + + unless ($saw_import || $no_defaults) { + foreach my $module (values %imports) { + next if $exclude {$module}; + eval "require $module;"; + die $@ if $@; + } + } + + my %exported; + foreach my $entry (grep {/^RE_/} @_) { + if ($entry =~ /^RE_(\w+_)?ALL$/) { + my $m = defined $1 ? $1 : ""; + my $re = qr /^RE_${m}.*$/; + while (my ($sub, $interface) = each %sub_interface) { + next if $exported {$sub}; + next unless $sub =~ /$re/; + { + no strict 'refs'; + *{caller() . "::$sub"} = $interface; + } + $exported {$sub} ++; + } + } + else { + next if $exported {$entry}; + _croak "Can't export unknown subroutine &$entry" + unless $sub_interface {$entry}; + { + no strict 'refs'; + *{caller() . "::$entry"} = $sub_interface {$entry}; + } + $exported {$entry} ++; + } + } +} + +sub AUTOLOAD { _croak "Can't $AUTOLOAD" } + +sub DESTROY {} + +my %cache; + +my $fpat = qr/^(-\w+)/; + +sub _decache { + my @args = @{tied %{$_[0]}}; + my @nonflags = grep {!/$fpat/} @args; + my $cache = get_cache(@nonflags); + _croak "Can't create unknown regex: \$RE{" + . join("}{",@args) . "}" + unless exists $cache->{__VAL__}; + _croak "Perl $] does not support the pattern " + . "\$RE{" . join("}{",@args) + . "}.\nYou need Perl $cache->{__VAL__}{version} or later" + unless ($cache->{__VAL__}{version}||0) <= $]; + my %flags = ( %{$cache->{__VAL__}{default}}, + map { /$fpat\Q$;\E(.*)/ ? ($1 => $2) + : /$fpat/ ? ($1 => undef) + : () + } @args); + $cache->{__VAL__}->_clone_with(\@args, \%flags); +} + +use overload q{""} => \&_decache; + + +sub get_cache { + my $cache = \%cache; + foreach (@_) { + $cache = $cache->{$_} + || ($cache->{$_} = {}); + } + return $cache; +} + +sub croak_version { + my ($entry, @args) = @_; +} + +sub pattern { + my %spec = @_; + _croak 'pattern() requires argument: name => [ @list ]' + unless $spec{name} && ref $spec{name} eq 'ARRAY'; + _croak 'pattern() requires argument: create => $sub_ref_or_string' + unless $spec{create}; + + if (ref $spec{create} ne "CODE") { + my $fixed_str = "$spec{create}"; + $spec{create} = sub { $fixed_str } + } + + my @nonflags; + my %default; + foreach ( @{$spec{name}} ) { + if (/$fpat=(.*)/) { + $default{$1} = $2; + } + elsif (/$fpat\s*$/) { + $default{$1} = undef; + } + else { + push @nonflags, $_; + } + } + + my $entry = get_cache(@nonflags); + + if ($entry->{__VAL__}) { + _carp "Overriding \$RE{" + . join("}{",@nonflags) + . "}"; + } + + $entry->{__VAL__} = bless { + create => $spec{create}, + match => $spec{match} || \&generic_match, + subs => $spec{subs} || \&generic_subs, + version => $spec{version}, + default => \%default, + }, 'Regexp::Common::Entry'; + + foreach (@nonflags) {s/\W/X/g} + my $subname = "RE_" . join ("_", @nonflags); + $sub_interface{$subname} = sub { + push @_ => undef if @_ % 2; + my %flags = @_; + my $pat = $spec{create}->($entry->{__VAL__}, + {%default, %flags}, \@nonflags); + if (exists $flags{-keep}) { $pat =~ s/\Q(?k:/(/g; } + else { $pat =~ s/\Q(?k:/(?:/g; } + return exists $flags {-i} ? qr /(?i:$pat)/ : qr/$pat/; + }; + + return 1; +} + +sub generic_match {$_ [1] =~ /$_[0]/} +sub generic_subs {$_ [1] =~ s/$_[0]/$_[2]/} + +sub matches { + my ($self, $str) = @_; + my $entry = $self -> _decache; + $entry -> {match} -> ($entry, $str); +} + +sub subs { + my ($self, $str, $newstr) = @_; + my $entry = $self -> _decache; + $entry -> {subs} -> ($entry, $str, $newstr); + return $str; +} + + +package Regexp::Common::Entry; +# use Carp; + +local $^W = 1; + +use overload + q{""} => sub { + my ($self) = @_; + my $pat = $self->{create}->($self, $self->{flags}, $self->{args}); + if (exists $self->{flags}{-keep}) { + $pat =~ s/\Q(?k:/(/g; + } + else { + $pat =~ s/\Q(?k:/(?:/g; + } + if (exists $self->{flags}{-i}) { $pat = "(?i)$pat" } + return $pat; + }; + +sub _clone_with { + my ($self, $args, $flags) = @_; + bless { %$self, args=>$args, flags=>$flags }, ref $self; +} +# +# Copyright (c) 2001 - 2005, Damian Conway and Abigail. All Rights +# Reserved. This module is free software. It may be used, redistributed +# and/or modified under the terms of the Perl Artistic License +# (see http://www.perl.com/perl/misc/Artistic.html) +EOCommon +# 2}}} +$Regexp_Common_Contents{'Common/comment'} = <<'EOC'; # {{{2 +# $Id: comment.pm,v 2.116 2005/03/16 00:00:02 abigail Exp $ + +package Regexp::Common::comment; + +use strict; +local $^W = 1; + +use Regexp::Common qw /pattern clean no_defaults/; +use vars qw /$VERSION/; + +($VERSION) = q $Revision: 2.116 $ =~ /[\d.]+/g; + +my @generic = ( + {languages => [qw /ABC Forth/], + to_eol => ['\\\\']}, # This is for just a *single* backslash. + + {languages => [qw /Ada Alan Eiffel lua/], + to_eol => ['--']}, + + {languages => [qw /Advisor/], + to_eol => ['#|//']}, + + {languages => [qw /Advsys CQL Lisp LOGO M MUMPS REBOL Scheme + SMITH zonefile/], + to_eol => [';']}, + + {languages => ['Algol 60'], + from_to => [[qw /comment ;/]]}, + + {languages => [qw {ALPACA B C C-- LPC PL/I}], + from_to => [[qw {/* */}]]}, + + {languages => [qw /awk fvwm2 Icon mutt Perl Python QML R Ruby shell Tcl/], + to_eol => ['#']}, + + {languages => [[BASIC => 'mvEnterprise']], + to_eol => ['[*!]|REM']}, + + {languages => [qw /Befunge-98 Funge-98 Shelta/], + id => [';']}, + + {languages => ['beta-Juliet', 'Crystal Report', 'Portia'], + to_eol => ['//']}, + + {languages => ['BML'], + from_to => [['']], + }, + + {languages => [qw /C++/, 'C#', qw /Cg ECMAScript FPL Java JavaScript/], + to_eol => ['//'], + from_to => [[qw {/* */}]]}, + + {languages => [qw /CLU LaTeX slrn TeX/], + to_eol => ['%']}, + + {languages => [qw /False/], + from_to => [[qw !{ }!]]}, + + {languages => [qw /Fortran/], + to_eol => ['!']}, + + {languages => [qw /Haifu/], + id => [',']}, + + {languages => [qw /ILLGOL/], + to_eol => ['NB']}, + + {languages => [qw /INTERCAL/], + to_eol => [q{(?:(?:PLEASE(?:\s+DO)?|DO)\s+)?(?:NOT|N'T)}]}, + + {languages => [qw /J/], + to_eol => ['NB[.]']}, + + {languages => [qw /Nickle/], + to_eol => ['#'], + from_to => [[qw {/* */}]]}, + + {languages => [qw /Oberon/], + from_to => [[qw /(* *)/]]}, + + {languages => [[qw /Pascal Delphi/], [qw /Pascal Free/], [qw /Pascal GPC/]], + to_eol => ['//'], + from_to => [[qw !{ }!], [qw !(* *)!]]}, + + {languages => [[qw /Pascal Workshop/]], + id => [qw /"/], + from_to => [[qw !{ }!], [qw !(* *)!], [qw !/* */!]]}, + + {languages => [qw /PEARL/], + to_eol => ['!'], + from_to => [[qw {/* */}]]}, + + {languages => [qw /PHP/], + to_eol => ['#', '//'], + from_to => [[qw {/* */}]]}, + + {languages => [qw !PL/B!], + to_eol => ['[.;]']}, + + {languages => [qw !PL/SQL!], + to_eol => ['--'], + from_to => [[qw {/* */}]]}, + + {languages => [qw /Q-BAL/], + to_eol => ['`']}, + + {languages => [qw /Smalltalk/], + id => ['"']}, + + {languages => [qw /SQL/], + to_eol => ['-{2,}']}, + + {languages => [qw /troff/], + to_eol => ['\\\"']}, + + {languages => [qw /vi/], + to_eol => ['"']}, + + {languages => [qw /*W/], + from_to => [[qw {|| !!}]]}, +); + +my @plain_or_nested = ( + [Caml => undef, "(*" => "*)"], + [Dylan => "//", "/*" => "*/"], + [Haskell => "-{2,}", "{-" => "-}"], + [Hugo => "!(?!\\\\)", "!\\" => "\\!"], + [SLIDE => "#", "(*" => "*)"], +); + +# +# Helper subs. +# + +sub combine { + local $_ = join "|", @_; + if (@_ > 1) { + s/\(\?k:/(?:/g; + $_ = "(?k:$_)"; + } + $_ +} + +sub to_eol ($) {"(?k:(?k:$_[0])(?k:[^\\n]*)(?k:\\n))"} +sub id ($) {"(?k:(?k:$_[0])(?k:[^$_[0]]*)(?k:$_[0]))"} # One char only! +sub from_to { + local $^W = 1; + my ($begin, $end) = @_; + + my $qb = quotemeta $begin; + my $qe = quotemeta $end; + my $fe = quotemeta substr $end => 0, 1; + my $te = quotemeta substr $end => 1; + + "(?k:(?k:$qb)(?k:(?:[^$fe]+|$fe(?!$te))*)(?k:$qe))"; +} + + +my $count = 0; +sub nested { + local $^W = 1; + my ($begin, $end) = @_; + + $count ++; + my $r = '(??{$Regexp::Common::comment ['. $count . ']})'; + + my $qb = quotemeta $begin; + my $qe = quotemeta $end; + my $fb = quotemeta substr $begin => 0, 1; + my $fe = quotemeta substr $end => 0, 1; + + my $tb = quotemeta substr $begin => 1; + my $te = quotemeta substr $end => 1; + + use re 'eval'; + + my $re; + if ($fb eq $fe) { + $re = qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/; + } + else { + local $" = "|"; + my @clauses = "(?>[^$fb$fe]+)"; + push @clauses => "$fb(?!$tb)" if length $tb; + push @clauses => "$fe(?!$te)" if length $te; + push @clauses => $r; + $re = qr /(?:$qb(?:@clauses)*$qe)/; + } + + $Regexp::Common::comment [$count] = qr/$re/; +} + +# +# Process data. +# + +foreach my $info (@plain_or_nested) { + my ($language, $mark, $begin, $end) = @$info; + pattern name => [comment => $language], + create => + sub {my $re = nested $begin => $end; + my $prefix = defined $mark ? $mark . "[^\n]*\n|" : ""; + exists $_ [1] -> {-keep} ? qr /($prefix$re)/ + : qr /$prefix$re/ + }, + version => 5.006, + ; +} + + +foreach my $group (@generic) { + my $pattern = combine +(map {to_eol $_} @{$group -> {to_eol}}), + (map {from_to @$_} @{$group -> {from_to}}), + (map {id $_} @{$group -> {id}}), + ; + foreach my $language (@{$group -> {languages}}) { + pattern name => [comment => ref $language ? @$language : $language], + create => $pattern, + ; + } +} + + + +# +# Other languages. +# + +# http://www.pascal-central.com/docs/iso10206.txt +pattern name => [qw /comment Pascal/], + create => '(?k:' . '(?k:[{]|[(][*])' + . '(?k:[^}*]*(?:[*][^)][^}*]*)*)' + . '(?k:[}]|[*][)])' + . ')' + ; + +# http://www.templetons.com/brad/alice/language/ +pattern name => [qw /comment Pascal Alice/], + create => '(?k:(?k:[{])(?k:[^}\n]*)(?k:[}]))' + ; + + +# http://westein.arb-phys.uni-dortmund.de/~wb/a68s.txt +pattern name => [qw (comment), 'Algol 68'], + create => q {(?k:(?:#[^#]*#)|} . + q {(?:\bco\b(?:[^c]+|\Bc|\bc(?!o\b))*\bco\b)|} . + q {(?:\bcomment\b(?:[^c]+|\Bc|\bc(?!omment\b))*\bcomment\b))} + ; + + +# See rules 91 and 92 of ISO 8879 (SGML). +# Charles F. Goldfarb: "The SGML Handbook". +# Oxford: Oxford University Press. 1990. ISBN 0-19-853737-9. +# Ch. 10.3, pp 390. +pattern name => [qw (comment HTML)], + create => q {(?k:(?k:))}, + ; + + +pattern name => [qw /comment SQL MySQL/], + create => q {(?k:(?:#|-- )[^\n]*\n|} . + q {/\*(?:(?>[^*;"']+)|"[^"]*"|'[^']*'|\*(?!/))*(?:;|\*/))}, + ; + +# Anything that isn't <>[]+-., +# http://home.wxs.nl/~faase009/Ha_BF.html +pattern name => [qw /comment Brainfuck/], + create => '(?k:[^<>\[\]+\-.,]+)' + ; + +# Squeak is a variant of Smalltalk-80. +# http://www.squeak. +# http://mucow.com/squeak-qref.html +pattern name => [qw /comment Squeak/], + create => '(?k:(?k:")(?k:[^"]*(?:""[^"]*)*)(?k:"))' + ; + +# +# Scores of less than 5 or above 17.... +# http://www.cliff.biffle.org/esoterica/beatnik.html +@Regexp::Common::comment::scores = (1, 3, 3, 2, 1, 4, 2, 4, 1, 8, + 5, 1, 3, 1, 1, 3, 10, 1, 1, 1, + 1, 4, 4, 8, 4, 10); +pattern name => [qw /comment Beatnik/], + create => sub { + use re 'eval'; + my ($s, $x); + my $re = qr {\b([A-Za-z]+)\b + (?(?{($s, $x) = (0, lc $^N); + $s += $Regexp::Common::comment::scores + [ord (chop $x) - ord ('a')] while length $x; + $s >= 5 && $s < 18})XXX|)}x; + $re; + }, + version => 5.008, + ; + + +# http://www.cray.com/craydoc/manuals/007-3692-005/html-007-3692-005/ +# (Goto table of contents/3.3 Source Form) +# Fortran, in fixed format. Comments start with a C, c or * in the first +# column, or a ! anywhere, but the sixth column. Then end with a newline. +pattern name => [qw /comment Fortran fixed/], + create => '(?k:(?k:(?:^[Cc*]|(? [qw /comment COBOL/], + create => '(?<=^......)(?k:(?k:[*])(?k:[^\n]*)(?k:\n))', + version => '5.008', + ; + +1; +# +# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved. +# This module is free software. It may be used, redistributed +# and/or modified under the terms of the Perl Artistic License +# (see http://www.perl.com/perl/misc/Artistic.html) +EOC +# 2}}} +$Regexp_Common_Contents{'Common/balanced'} = <<'EOB'; # {{{2 +package Regexp::Common::balanced; { + +use strict; +local $^W = 1; + +use vars qw /$VERSION/; +($VERSION) = q $Revision: 2.101 $ =~ /[\d.]+/g; + +use Regexp::Common qw /pattern clean no_defaults/; + +my %closer = ( '{'=>'}', '('=>')', '['=>']', '<'=>'>' ); +my $count = -1; +my %cache; + +sub nested { + local $^W = 1; + my ($start, $finish) = @_; + + return $Regexp::Common::balanced [$cache {$start} {$finish}] + if exists $cache {$start} {$finish}; + + $count ++; + my $r = '(??{$Regexp::Common::balanced ['. $count . ']})'; + + my @starts = map {s/\\(.)/$1/g; $_} grep {length} + $start =~ /([^|\\]+|\\.)+/gs; + my @finishes = map {s/\\(.)/$1/g; $_} grep {length} + $finish =~ /([^|\\]+|\\.)+/gs; + + push @finishes => ($finishes [-1]) x (@starts - @finishes); + + my @re; + local $" = "|"; + foreach my $begin (@starts) { + my $end = shift @finishes; + + my $qb = quotemeta $begin; + my $qe = quotemeta $end; + my $fb = quotemeta substr $begin => 0, 1; + my $fe = quotemeta substr $end => 0, 1; + + my $tb = quotemeta substr $begin => 1; + my $te = quotemeta substr $end => 1; + + use re 'eval'; + + my $add; + if ($fb eq $fe) { + push @re => + qr /(?:$qb(?:(?>[^$fb]+)|$fb(?!$tb)(?!$te)|$r)*$qe)/; + } + else { + my @clauses = "(?>[^$fb$fe]+)"; + push @clauses => "$fb(?!$tb)" if length $tb; + push @clauses => "$fe(?!$te)" if length $te; + push @clauses => $r; + push @re => qr /(?:$qb(?:@clauses)*$qe)/; + } + } + + $cache {$start} {$finish} = $count; + $Regexp::Common::balanced [$count] = qr/@re/; +} + + +pattern name => [qw /balanced -parens=() -begin= -end=/], + create => sub { + my $flag = $_[1]; + unless (defined $flag -> {-begin} && length $flag -> {-begin} && + defined $flag -> {-end} && length $flag -> {-end}) { + my @open = grep {index ($flag->{-parens}, $_) >= 0} + ('[','(','{','<'); + my @close = map {$closer {$_}} @open; + $flag -> {-begin} = join "|" => @open; + $flag -> {-end} = join "|" => @close; + } + my $pat = nested @$flag {qw /-begin -end/}; + return exists $flag -> {-keep} ? qr /($pat)/ : $pat; + }, + version => 5.006, + ; + +} + +1; +# +# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved. +# This module is free software. It may be used, redistributed +# and/or modified under the terms of the Perl Artistic License +# (see http://www.perl.com/perl/misc/Artistic.html) +EOB +# 2}}} +$Regexp_Common_Contents{'Common/delimited'} = <<'EOD'; # {{{2 +# $Id: delimited.pm,v 2.104 2005/03/16 00:22:45 abigail Exp $ + +package Regexp::Common::delimited; + +use strict; +local $^W = 1; + +use Regexp::Common qw /pattern clean no_defaults/; +use vars qw /$VERSION/; + +($VERSION) = q $Revision: 2.104 $ =~ /[\d.]+/g; + +sub gen_delimited { + + my ($dels, $escs) = @_; + # return '(?:\S*)' unless $dels =~ /\S/; + if (length $escs) { + $escs .= substr ($escs, -1) x (length ($dels) - length ($escs)); + } + my @pat = (); + my $i; + for ($i=0; $i < length $dels; $i++) { + my $del = quotemeta substr ($dels, $i, 1); + my $esc = length($escs) ? quotemeta substr ($escs, $i, 1) : ""; + if ($del eq $esc) { + push @pat, + "(?k:$del)(?k:[^$del]*(?:(?:$del$del)[^$del]*)*)(?k:$del)"; + } + elsif (length $esc) { + push @pat, + "(?k:$del)(?k:[^$esc$del]*(?:$esc.[^$esc$del]*)*)(?k:$del)"; + } + else { + push @pat, "(?k:$del)(?k:[^$del]*)(?k:$del)"; + } + } + my $pat = join '|', @pat; + return "(?k:$pat)"; +} + +sub _croak { + require Carp; + goto &Carp::croak; +} + +pattern name => [qw( delimited -delim= -esc=\\ )], + create => sub {my $flags = $_[1]; + _croak 'Must specify delimiter in $RE{delimited}' + unless length $flags->{-delim}; + return gen_delimited (@{$flags}{-delim, -esc}); + }, + ; + +pattern name => [qw( quoted -esc=\\ )], + create => sub {my $flags = $_[1]; + return gen_delimited (q{"'`}, $flags -> {-esc}); + }, + ; + + +1; +# +# Copyright (c) 2001 - 2003, Damian Conway. All Rights Reserved. +# This module is free software. It may be used, redistributed +# and/or modified under the terms of the Perl Artistic License +# (see http://www.perl.com/perl/misc/Artistic.html) +EOD +# 2}}} + my $problems = 0; + $HAVE_Rexexp_Common = 0; + my $dir = ""; + if ($opt_sdir) { + # write to the user-defined scratch directory + $dir = $opt_sdir; + } else { + # let File::Temp create a suitable temporary directory + $dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit + } + print "Using temp dir [$dir] to install Regexp::Common\n" if $opt_v; + my $Regexp_dir = "$dir/Regexp"; + my $Regexp_Common_dir = "$dir/Regexp/Common"; + mkdir $Regexp_dir ; + mkdir $Regexp_Common_dir; + + foreach my $module_file (keys %Regexp_Common_Contents) { + my $OUT = new IO::File "$dir/Regexp/${module_file}.pm", "w"; + if (defined $OUT) { + print $OUT $Regexp_Common_Contents{$module_file}; + $OUT->close; + } else { + warn "Failed to install Regexp::${module_file}.pm\n"; + $problems = 1; + } + } + + push @INC, $dir; + eval "use Regexp::Common qw /comment RE_comment_HTML balanced/"; + $HAVE_Rexexp_Common = 1 unless $problems; +} # 1}}} +sub Install_Algorithm_Diff { # {{{1 + # Installs Tye McQueen's Algorithm::Diff module, v1.1902, into a + # temporary directory for the duration of this run. + +my $Algorithm_Diff_Contents = <<'EOAlgDiff'; # {{{2 +package Algorithm::Diff; +# Skip to first "=head" line for documentation. +use strict; + +use integer; # see below in _replaceNextLargerWith() for mod to make + # if you don't use this +use vars qw( $VERSION @EXPORT_OK ); +$VERSION = 1.19_02; +# ^ ^^ ^^-- Incremented at will +# | \+----- Incremented for non-trivial changes to features +# \-------- Incremented for fundamental changes +require Exporter; +*import = \&Exporter::import; +@EXPORT_OK = qw( + prepare LCS LCSidx LCS_length + diff sdiff compact_diff + traverse_sequences traverse_balanced +); + +# McIlroy-Hunt diff algorithm +# Adapted from the Smalltalk code of Mario I. Wolczko, +# by Ned Konz, perl@bike-nomad.com +# Updates by Tye McQueen, http://perlmonks.org/?node=tye + +# Create a hash that maps each element of $aCollection to the set of +# positions it occupies in $aCollection, restricted to the elements +# within the range of indexes specified by $start and $end. +# The fourth parameter is a subroutine reference that will be called to +# generate a string to use as a key. +# Additional parameters, if any, will be passed to this subroutine. +# +# my $hashRef = _withPositionsOfInInterval( \@array, $start, $end, $keyGen ); + +sub _withPositionsOfInInterval +{ + my $aCollection = shift; # array ref + my $start = shift; + my $end = shift; + my $keyGen = shift; + my %d; + my $index; + for ( $index = $start ; $index <= $end ; $index++ ) + { + my $element = $aCollection->[$index]; + my $key = &$keyGen( $element, @_ ); + if ( exists( $d{$key} ) ) + { + unshift ( @{ $d{$key} }, $index ); + } + else + { + $d{$key} = [$index]; + } + } + return wantarray ? %d : \%d; +} + +# Find the place at which aValue would normally be inserted into the +# array. If that place is already occupied by aValue, do nothing, and +# return undef. If the place does not exist (i.e., it is off the end of +# the array), add it to the end, otherwise replace the element at that +# point with aValue. It is assumed that the array's values are numeric. +# This is where the bulk (75%) of the time is spent in this module, so +# try to make it fast! + +sub _replaceNextLargerWith +{ + my ( $array, $aValue, $high ) = @_; + $high ||= $#$array; + + # off the end? + if ( $high == -1 || $aValue > $array->[-1] ) + { + push ( @$array, $aValue ); + return $high + 1; + } + + # binary search for insertion point... + my $low = 0; + my $index; + my $found; + while ( $low <= $high ) + { + $index = ( $high + $low ) / 2; + + # $index = int(( $high + $low ) / 2); # without 'use integer' + $found = $array->[$index]; + + if ( $aValue == $found ) + { + return undef; + } + elsif ( $aValue > $found ) + { + $low = $index + 1; + } + else + { + $high = $index - 1; + } + } + + # now insertion point is in $low. + $array->[$low] = $aValue; # overwrite next larger + return $low; +} + +# This method computes the longest common subsequence in $a and $b. + +# Result is array or ref, whose contents is such that +# $a->[ $i ] == $b->[ $result[ $i ] ] +# foreach $i in ( 0 .. $#result ) if $result[ $i ] is defined. + +# An additional argument may be passed; this is a hash or key generating +# function that should return a string that uniquely identifies the given +# element. It should be the case that if the key is the same, the elements +# will compare the same. If this parameter is undef or missing, the key +# will be the element as a string. + +# By default, comparisons will use "eq" and elements will be turned into keys +# using the default stringizing operator '""'. + +# Additional parameters, if any, will be passed to the key generation +# routine. + +sub _longestCommonSubsequence +{ + my $a = shift; # array ref or hash ref + my $b = shift; # array ref or hash ref + my $counting = shift; # scalar + my $keyGen = shift; # code ref + my $compare; # code ref + + if ( ref($a) eq 'HASH' ) + { # prepared hash must be in $b + my $tmp = $b; + $b = $a; + $a = $tmp; + } + + # Check for bogus (non-ref) argument values + if ( !ref($a) || !ref($b) ) + { + my @callerInfo = caller(1); + die 'error: must pass array or hash references to ' . $callerInfo[3]; + } + + # set up code refs + # Note that these are optimized. + if ( !defined($keyGen) ) # optimize for strings + { + $keyGen = sub { $_[0] }; + $compare = sub { my ( $a, $b ) = @_; $a eq $b }; + } + else + { + $compare = sub { + my $a = shift; + my $b = shift; + &$keyGen( $a, @_ ) eq &$keyGen( $b, @_ ); + }; + } + + my ( $aStart, $aFinish, $matchVector ) = ( 0, $#$a, [] ); + my ( $prunedCount, $bMatches ) = ( 0, {} ); + + if ( ref($b) eq 'HASH' ) # was $bMatches prepared for us? + { + $bMatches = $b; + } + else + { + my ( $bStart, $bFinish ) = ( 0, $#$b ); + + # First we prune off any common elements at the beginning + while ( $aStart <= $aFinish + and $bStart <= $bFinish + and &$compare( $a->[$aStart], $b->[$bStart], @_ ) ) + { + $matchVector->[ $aStart++ ] = $bStart++; + $prunedCount++; + } + + # now the end + while ( $aStart <= $aFinish + and $bStart <= $bFinish + and &$compare( $a->[$aFinish], $b->[$bFinish], @_ ) ) + { + $matchVector->[ $aFinish-- ] = $bFinish--; + $prunedCount++; + } + + # Now compute the equivalence classes of positions of elements + $bMatches = + _withPositionsOfInInterval( $b, $bStart, $bFinish, $keyGen, @_ ); + } + my $thresh = []; + my $links = []; + + my ( $i, $ai, $j, $k ); + for ( $i = $aStart ; $i <= $aFinish ; $i++ ) + { + $ai = &$keyGen( $a->[$i], @_ ); + if ( exists( $bMatches->{$ai} ) ) + { + $k = 0; + for $j ( @{ $bMatches->{$ai} } ) + { + + # optimization: most of the time this will be true + if ( $k and $thresh->[$k] > $j and $thresh->[ $k - 1 ] < $j ) + { + $thresh->[$k] = $j; + } + else + { + $k = _replaceNextLargerWith( $thresh, $j, $k ); + } + + # oddly, it's faster to always test this (CPU cache?). + if ( defined($k) ) + { + $links->[$k] = + [ ( $k ? $links->[ $k - 1 ] : undef ), $i, $j ]; + } + } + } + } + + if (@$thresh) + { + return $prunedCount + @$thresh if $counting; + for ( my $link = $links->[$#$thresh] ; $link ; $link = $link->[0] ) + { + $matchVector->[ $link->[1] ] = $link->[2]; + } + } + elsif ($counting) + { + return $prunedCount; + } + + return wantarray ? @$matchVector : $matchVector; +} + +sub traverse_sequences +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $callbacks = shift || {}; + my $keyGen = shift; + my $matchCallback = $callbacks->{'MATCH'} || sub { }; + my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; + my $finishedACallback = $callbacks->{'A_FINISHED'}; + my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; + my $finishedBCallback = $callbacks->{'B_FINISHED'}; + my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); + + # Process all the lines in @$matchVector + my $lastA = $#$a; + my $lastB = $#$b; + my $bi = 0; + my $ai; + + for ( $ai = 0 ; $ai <= $#$matchVector ; $ai++ ) + { + my $bLine = $matchVector->[$ai]; + if ( defined($bLine) ) # matched + { + &$discardBCallback( $ai, $bi++, @_ ) while $bi < $bLine; + &$matchCallback( $ai, $bi++, @_ ); + } + else + { + &$discardACallback( $ai, $bi, @_ ); + } + } + + # The last entry (if any) processed was a match. + # $ai and $bi point just past the last matching lines in their sequences. + + while ( $ai <= $lastA or $bi <= $lastB ) + { + + # last A? + if ( $ai == $lastA + 1 and $bi <= $lastB ) + { + if ( defined($finishedACallback) ) + { + &$finishedACallback( $lastA, @_ ); + $finishedACallback = undef; + } + else + { + &$discardBCallback( $ai, $bi++, @_ ) while $bi <= $lastB; + } + } + + # last B? + if ( $bi == $lastB + 1 and $ai <= $lastA ) + { + if ( defined($finishedBCallback) ) + { + &$finishedBCallback( $lastB, @_ ); + $finishedBCallback = undef; + } + else + { + &$discardACallback( $ai++, $bi, @_ ) while $ai <= $lastA; + } + } + + &$discardACallback( $ai++, $bi, @_ ) if $ai <= $lastA; + &$discardBCallback( $ai, $bi++, @_ ) if $bi <= $lastB; + } + + return 1; +} + +sub traverse_balanced +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $callbacks = shift || {}; + my $keyGen = shift; + my $matchCallback = $callbacks->{'MATCH'} || sub { }; + my $discardACallback = $callbacks->{'DISCARD_A'} || sub { }; + my $discardBCallback = $callbacks->{'DISCARD_B'} || sub { }; + my $changeCallback = $callbacks->{'CHANGE'}; + my $matchVector = _longestCommonSubsequence( $a, $b, 0, $keyGen, @_ ); + + # Process all the lines in match vector + my $lastA = $#$a; + my $lastB = $#$b; + my $bi = 0; + my $ai = 0; + my $ma = -1; + my $mb; + + while (1) + { + + # Find next match indices $ma and $mb + do { + $ma++; + } while( + $ma <= $#$matchVector + && !defined $matchVector->[$ma] + ); + + last if $ma > $#$matchVector; # end of matchVector? + $mb = $matchVector->[$ma]; + + # Proceed with discard a/b or change events until + # next match + while ( $ai < $ma || $bi < $mb ) + { + + if ( $ai < $ma && $bi < $mb ) + { + + # Change + if ( defined $changeCallback ) + { + &$changeCallback( $ai++, $bi++, @_ ); + } + else + { + &$discardACallback( $ai++, $bi, @_ ); + &$discardBCallback( $ai, $bi++, @_ ); + } + } + elsif ( $ai < $ma ) + { + &$discardACallback( $ai++, $bi, @_ ); + } + else + { + + # $bi < $mb + &$discardBCallback( $ai, $bi++, @_ ); + } + } + + # Match + &$matchCallback( $ai++, $bi++, @_ ); + } + + while ( $ai <= $lastA || $bi <= $lastB ) + { + if ( $ai <= $lastA && $bi <= $lastB ) + { + + # Change + if ( defined $changeCallback ) + { + &$changeCallback( $ai++, $bi++, @_ ); + } + else + { + &$discardACallback( $ai++, $bi, @_ ); + &$discardBCallback( $ai, $bi++, @_ ); + } + } + elsif ( $ai <= $lastA ) + { + &$discardACallback( $ai++, $bi, @_ ); + } + else + { + + # $bi <= $lastB + &$discardBCallback( $ai, $bi++, @_ ); + } + } + + return 1; +} + +sub prepare +{ + my $a = shift; # array ref + my $keyGen = shift; # code ref + + # set up code ref + $keyGen = sub { $_[0] } unless defined($keyGen); + + return scalar _withPositionsOfInInterval( $a, 0, $#$a, $keyGen, @_ ); +} + +sub LCS +{ + my $a = shift; # array ref + my $b = shift; # array ref or hash ref + my $matchVector = _longestCommonSubsequence( $a, $b, 0, @_ ); + my @retval; + my $i; + for ( $i = 0 ; $i <= $#$matchVector ; $i++ ) + { + if ( defined( $matchVector->[$i] ) ) + { + push ( @retval, $a->[$i] ); + } + } + return wantarray ? @retval : \@retval; +} + +sub LCS_length +{ + my $a = shift; # array ref + my $b = shift; # array ref or hash ref + return _longestCommonSubsequence( $a, $b, 1, @_ ); +} + +sub LCSidx +{ + my $a= shift @_; + my $b= shift @_; + my $match= _longestCommonSubsequence( $a, $b, 0, @_ ); + my @am= grep defined $match->[$_], 0..$#$match; + my @bm= @{$match}[@am]; + return \@am, \@bm; +} + +sub compact_diff +{ + my $a= shift @_; + my $b= shift @_; + my( $am, $bm )= LCSidx( $a, $b, @_ ); + my @cdiff; + my( $ai, $bi )= ( 0, 0 ); + push @cdiff, $ai, $bi; + while( 1 ) { + while( @$am && $ai == $am->[0] && $bi == $bm->[0] ) { + shift @$am; + shift @$bm; + ++$ai, ++$bi; + } + push @cdiff, $ai, $bi; + last if ! @$am; + $ai = $am->[0]; + $bi = $bm->[0]; + push @cdiff, $ai, $bi; + } + push @cdiff, 0+@$a, 0+@$b + if $ai < @$a || $bi < @$b; + return wantarray ? @cdiff : \@cdiff; +} + +sub diff +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $retval = []; + my $hunk = []; + my $discard = sub { + push @$hunk, [ '-', $_[0], $a->[ $_[0] ] ]; + }; + my $add = sub { + push @$hunk, [ '+', $_[1], $b->[ $_[1] ] ]; + }; + my $match = sub { + push @$retval, $hunk + if 0 < @$hunk; + $hunk = [] + }; + traverse_sequences( $a, $b, + { MATCH => $match, DISCARD_A => $discard, DISCARD_B => $add }, @_ ); + &$match(); + return wantarray ? @$retval : $retval; +} + +sub sdiff +{ + my $a = shift; # array ref + my $b = shift; # array ref + my $retval = []; + my $discard = sub { push ( @$retval, [ '-', $a->[ $_[0] ], "" ] ) }; + my $add = sub { push ( @$retval, [ '+', "", $b->[ $_[1] ] ] ) }; + my $change = sub { + push ( @$retval, [ 'c', $a->[ $_[0] ], $b->[ $_[1] ] ] ); + }; + my $match = sub { + push ( @$retval, [ 'u', $a->[ $_[0] ], $b->[ $_[1] ] ] ); + }; + traverse_balanced( + $a, + $b, + { + MATCH => $match, + DISCARD_A => $discard, + DISCARD_B => $add, + CHANGE => $change, + }, + @_ + ); + return wantarray ? @$retval : $retval; +} + +######################################## +my $Root= __PACKAGE__; +package Algorithm::Diff::_impl; +use strict; + +sub _Idx() { 0 } # $me->[_Idx]: Ref to array of hunk indices + # 1 # $me->[1]: Ref to first sequence + # 2 # $me->[2]: Ref to second sequence +sub _End() { 3 } # $me->[_End]: Diff between forward and reverse pos +sub _Same() { 4 } # $me->[_Same]: 1 if pos 1 contains unchanged items +sub _Base() { 5 } # $me->[_Base]: Added to range's min and max +sub _Pos() { 6 } # $me->[_Pos]: Which hunk is currently selected +sub _Off() { 7 } # $me->[_Off]: Offset into _Idx for current position +sub _Min() { -2 } # Added to _Off to get min instead of max+1 + +sub Die +{ + require Carp; + Carp::confess( @_ ); +} + +sub _ChkPos +{ + my( $me )= @_; + return if $me->[_Pos]; + my $meth= ( caller(1) )[3]; + Die( "Called $meth on 'reset' object" ); +} + +sub _ChkSeq +{ + my( $me, $seq )= @_; + return $seq + $me->[_Off] + if 1 == $seq || 2 == $seq; + my $meth= ( caller(1) )[3]; + Die( "$meth: Invalid sequence number ($seq); must be 1 or 2" ); +} + +sub getObjPkg +{ + my( $us )= @_; + return ref $us if ref $us; + return $us . "::_obj"; +} + +sub new +{ + my( $us, $seq1, $seq2, $opts ) = @_; + my @args; + for( $opts->{keyGen} ) { + push @args, $_ if $_; + } + for( $opts->{keyGenArgs} ) { + push @args, @$_ if $_; + } + my $cdif= Algorithm::Diff::compact_diff( $seq1, $seq2, @args ); + my $same= 1; + if( 0 == $cdif->[2] && 0 == $cdif->[3] ) { + $same= 0; + splice @$cdif, 0, 2; + } + my @obj= ( $cdif, $seq1, $seq2 ); + $obj[_End] = (1+@$cdif)/2; + $obj[_Same] = $same; + $obj[_Base] = 0; + my $me = bless \@obj, $us->getObjPkg(); + $me->Reset( 0 ); + return $me; +} + +sub Reset +{ + my( $me, $pos )= @_; + $pos= int( $pos || 0 ); + $pos += $me->[_End] + if $pos < 0; + $pos= 0 + if $pos < 0 || $me->[_End] <= $pos; + $me->[_Pos]= $pos || !1; + $me->[_Off]= 2*$pos - 1; + return $me; +} + +sub Base +{ + my( $me, $base )= @_; + my $oldBase= $me->[_Base]; + $me->[_Base]= 0+$base if defined $base; + return $oldBase; +} + +sub Copy +{ + my( $me, $pos, $base )= @_; + my @obj= @$me; + my $you= bless \@obj, ref($me); + $you->Reset( $pos ) if defined $pos; + $you->Base( $base ); + return $you; +} + +sub Next { + my( $me, $steps )= @_; + $steps= 1 if ! defined $steps; + if( $steps ) { + my $pos= $me->[_Pos]; + my $new= $pos + $steps; + $new= 0 if $pos && $new < 0; + $me->Reset( $new ) + } + return $me->[_Pos]; +} + +sub Prev { + my( $me, $steps )= @_; + $steps= 1 if ! defined $steps; + my $pos= $me->Next(-$steps); + $pos -= $me->[_End] if $pos; + return $pos; +} + +sub Diff { + my( $me )= @_; + $me->_ChkPos(); + return 0 if $me->[_Same] == ( 1 & $me->[_Pos] ); + my $ret= 0; + my $off= $me->[_Off]; + for my $seq ( 1, 2 ) { + $ret |= $seq + if $me->[_Idx][ $off + $seq + _Min ] + < $me->[_Idx][ $off + $seq ]; + } + return $ret; +} + +sub Min { + my( $me, $seq, $base )= @_; + $me->_ChkPos(); + my $off= $me->_ChkSeq($seq); + $base= $me->[_Base] if !defined $base; + return $base + $me->[_Idx][ $off + _Min ]; +} + +sub Max { + my( $me, $seq, $base )= @_; + $me->_ChkPos(); + my $off= $me->_ChkSeq($seq); + $base= $me->[_Base] if !defined $base; + return $base + $me->[_Idx][ $off ] -1; +} + +sub Range { + my( $me, $seq, $base )= @_; + $me->_ChkPos(); + my $off = $me->_ChkSeq($seq); + if( !wantarray ) { + return $me->[_Idx][ $off ] + - $me->[_Idx][ $off + _Min ]; + } + $base= $me->[_Base] if !defined $base; + return ( $base + $me->[_Idx][ $off + _Min ] ) + .. ( $base + $me->[_Idx][ $off ] - 1 ); +} + +sub Items { + my( $me, $seq )= @_; + $me->_ChkPos(); + my $off = $me->_ChkSeq($seq); + if( !wantarray ) { + return $me->[_Idx][ $off ] + - $me->[_Idx][ $off + _Min ]; + } + return + @{$me->[$seq]}[ + $me->[_Idx][ $off + _Min ] + .. ( $me->[_Idx][ $off ] - 1 ) + ]; +} + +sub Same { + my( $me )= @_; + $me->_ChkPos(); + return wantarray ? () : 0 + if $me->[_Same] != ( 1 & $me->[_Pos] ); + return $me->Items(1); +} + +my %getName; +BEGIN { + %getName= ( + same => \&Same, + diff => \&Diff, + base => \&Base, + min => \&Min, + max => \&Max, + range=> \&Range, + items=> \&Items, # same thing + ); +} + +sub Get +{ + my $me= shift @_; + $me->_ChkPos(); + my @value; + for my $arg ( @_ ) { + for my $word ( split ' ', $arg ) { + my $meth; + if( $word !~ /^(-?\d+)?([a-zA-Z]+)([12])?$/ + || not $meth= $getName{ lc $2 } + ) { + Die( $Root, ", Get: Invalid request ($word)" ); + } + my( $base, $name, $seq )= ( $1, $2, $3 ); + push @value, scalar( + 4 == length($name) + ? $meth->( $me ) + : $meth->( $me, $seq, $base ) + ); + } + } + if( wantarray ) { + return @value; + } elsif( 1 == @value ) { + return $value[0]; + } + Die( 0+@value, " values requested from ", + $Root, "'s Get in scalar context" ); +} + + +my $Obj= getObjPkg($Root); +no strict 'refs'; + +for my $meth ( qw( new getObjPkg ) ) { + *{$Root."::".$meth} = \&{$meth}; + *{$Obj ."::".$meth} = \&{$meth}; +} +for my $meth ( qw( + Next Prev Reset Copy Base Diff + Same Items Range Min Max Get + _ChkPos _ChkSeq +) ) { + *{$Obj."::".$meth} = \&{$meth}; +} + +1; +# This version released by Tye McQueen (http://perlmonks.org/?node=tye). +# +# =head1 LICENSE +# +# Parts Copyright (c) 2000-2004 Ned Konz. All rights reserved. +# Parts by Tye McQueen. +# +# This program is free software; you can redistribute it and/or modify it +# under the same terms as Perl. +# +# =head1 MAILING LIST +# +# Mark-Jason still maintains a mailing list. To join a low-volume mailing +# list for announcements related to diff and Algorithm::Diff, send an +# empty mail message to mjd-perl-diff-request@plover.com. +# =head1 CREDITS +# +# Versions through 0.59 (and much of this documentation) were written by: +# +# Mark-Jason Dominus, mjd-perl-diff@plover.com +# +# This version borrows some documentation and routine names from +# Mark-Jason's, but Diff.pm's code was completely replaced. +# +# This code was adapted from the Smalltalk code of Mario Wolczko +# , which is available at +# ftp://st.cs.uiuc.edu/pub/Smalltalk/MANCHESTER/manchester/4.0/diff.st +# +# C and C were written by Mike Schilli +# . +# +# The algorithm is that described in +# I, +# CACM, vol.20, no.5, pp.350-353, May 1977, with a few +# minor improvements to improve the speed. +# +# Much work was done by Ned Konz (perl@bike-nomad.com). +# +# The OO interface and some other changes are by Tye McQueen. +# +EOAlgDiff +# 2}}} + my $problems = 0; + $HAVE_Algorith_Diff = 0; + my $dir = ""; + if ($opt_sdir) { + # write to the user-defined scratch directory + $dir = $opt_sdir; + } else { + # let File::Temp create a suitable temporary directory + $dir = tempdir( CLEANUP => 1 ); # 1 = delete on exit + } + print "Using temp dir [$dir] to install Algorithm::Diff\n" if $opt_v; + my $Algorithm_dir = "$dir/Algorithm"; + my $Algorithm_Diff_dir = "$dir/Algorithm/Diff"; + mkdir $Algorithm_dir ; + mkdir $Algorithm_Diff_dir; + + my $OUT = new IO::File "$dir/Algorithm/Diff.pm", "w"; + if (defined $OUT) { + print $OUT $Algorithm_Diff_Contents; + $OUT->close; + } else { + warn "Failed to install Algorithm/Diff.pm\n"; + $problems = 1; + } + + push @INC, $dir; # between this & Regexp::Common only need to do once + eval "use Algorithm::Diff qw / sdiff /"; + $HAVE_Algorith_Diff = 1 unless $problems; +} # 1}}} +sub call_regexp_common { # {{{1 + my ($ra_lines, $language ) = @_; + print "-> call_regexp_common\n" if $opt_v > 2; + + Install_Regexp_Common() unless $HAVE_Rexexp_Common; + + my $all_lines = join("", @{$ra_lines}); + + no strict 'vars'; + # otherwise get: + # Global symbol "%RE" requires explicit package name at cloc line xx. + if ($all_lines =~ $RE{comment}{$language}) { + # Suppress "Use of uninitialized value in regexp compilation" that + # pops up when $1 is undefined--happens if there's a bug in the $RE + # This Pascal comment will trigger it: + # (* This is { another } test. **) + # Curiously, testing for "defined $1" breaks the substitution. + no warnings; + # remove comments + $all_lines =~ s/$1//g; + } + # a bogus use of %RE to avoid: + # Name "main::RE" used only once: possible typo at cloc line xx. + print scalar keys %RE if $opt_v < -20; +#?#print "$all_lines\n"; + print "<- call_regexp_common\n" if $opt_v > 2; + return split("\n", $all_lines); +} # 1}}} +sub plural_form { # {{{1 + # For getting the right plural form on some English nouns. + my $n = shift @_; + if ($n == 1) { return ( 1, "" ); } + else { return ($n, "s"); } +} # 1}}} +sub matlab_or_objective_C { # {{{1 + # Decide if code is MATLAB, Objective C, or MUMPS + my ($file , # in + $rh_Err , # in hash of error codes + $raa_errors , # out + $rs_language , # out + ) = @_; + + print "-> matlab_or_objective_C\n" if $opt_v > 2; + # matlab markers: + # first line starts with "function" + # some lines start with "%" + # high marks for lines that start with [ + # + # Objective C markers: + # must have at least two brace characters, { } + # has /* ... */ style comments + # some lines start with @ + # some lines start with #include + # + # MUMPS: + # has ; comment markers + # do not match: \w+\s*=\s*\w + # lines begin with \s*\.?\w+\s+\w + # high marks for lines that start with \s*K\s+ or \s*Kill\s+ + + ${$rs_language} = ""; + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + push @{$raa_errors}, [$rh_Err->{'Unable to read'} , $file]; + return; + } + + my $DEBUG = 0; + + my $matlab_points = 0; + my $objective_C_points = 0; + my $mumps_points = 0; + my $has_braces = 0; + while (<$IN>) { + ++$has_braces if m/[{}]/; + ++$mumps_points if $. == 1 and m{^[A-Z]}; + if (m{^\s*/\*}) { # /* + ++$objective_C_points; + --$matlab_points; +printf ".m: /* obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{\w+\s*=\s*\[}) { # matrix assignment, very matlab + $matlab_points += 5; +printf ".m: \\w=[ obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{^\s*\w+\s*=\s*}) { # definitely not MUMPS + --$mumps_points; +printf ".m: \\w= obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{^\s*\.?(\w)\s+(\w)} and $1 !~ /\d/ and $2 !~ /\d/) { + ++$mumps_points; +printf ".m: \\w \\w obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{^\s*;}) { + ++$mumps_points; +printf ".m: ; obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{^\s*#(include|import)}) { + # Objective C without a doubt + $objective_C_points = 1; + $matlab_points = 0; +printf ".m: #includ obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + last; + } elsif (m{^\s*@(interface|implementation|protocol|public|protected|private|end)\s}o) { + # Objective C without a doubt + $objective_C_points = 1; + $matlab_points = 0; +printf ".m: keyword obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + last; + } elsif (m{^\s*\[}) { # line starts with [ -- very matlab + $matlab_points += 5; +printf ".m: [ obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{^\sK(ill)?\s+}) { + $mumps_points += 5; +printf ".m: Kill obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{^\s*function}) { + --$objective_C_points; + ++$matlab_points; +printf ".m: funct obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } elsif (m{^\s*%}) { # % + --$objective_C_points; + ++$matlab_points; + ++$mumps_points; +printf ".m: pcent obj C=% 2d matlab=% 2d mumps=% 2d\n", $objective_C_points, $matlab_points, $mumps_points if $DEBUG; + } + } + $IN->close; + + print "<- matlab_or_objective_C(matlab=$matlab_points, C=$objective_C_points, mumps=$mumps_points)\n" + if $opt_v > 2; + $objective_C_points = -9.9e20 unless $has_braces >= 2; + if (($matlab_points > $objective_C_points) and + ($matlab_points > $mumps_points) ) { + ${$rs_language} = "MATLAB"; + } elsif (($mumps_points > $objective_C_points) and + ($mumps_points > $matlab_points) ) { + ${$rs_language} = "MUMPS"; + } else { + ${$rs_language} = "Objective C"; + } + +} # 1}}} +sub html_colored_text { # {{{1 + # http://www.pagetutor.com/pagetutor/makapage/pics/net216-2.gif + my ($color, $text) = @_; +#?#die "html_colored_text($text)"; + if ($color =~ /^red$/i) { + $color = "#ff0000"; + } elsif ($color =~ /^green$/i) { + $color = "#00ff00"; + } elsif ($color =~ /^blue$/i) { + $color = "#0000ff"; + } elsif ($color =~ /^grey$/i) { + $color = "#cccccc"; + } +# return "" unless $text; + return '' . html_metachars($text) . ""; +} # 1}}} +sub html_metachars { # {{{1 + # Replace HTML metacharacters with their printable forms. + # Future: use HTML-Encoder-0.00_04/lib/HTML/Encoder.pm + # from Fabiano Reese Righetti's HTML::Encoder module if + # this subroutine proves to be too simplistic. + my ($string, ) = shift @_; + + my @in_chars = split(//, $string); + my @out_chars = (); + foreach my $c (@in_chars) { + if ($c eq '<') { + push @out_chars, '<' + } elsif ($c eq '>') { + push @out_chars, '>' + } elsif ($c eq '&') { + push @out_chars, '&' + } else { + push @out_chars, $c; + } + } + return join "", @out_chars; +} # 1}}} +sub test_alg_diff { # {{{1 + my ($file_1 , + $file_2 ) + = @_; + my $fh_1 = new IO::File $file_1, "r"; + die "Unable to read $file_1: $!\n" unless defined $fh_1; + chomp(my @lines_1 = <$fh_1>); + $fh_1->close; + + my $fh_2 = new IO::File $file_2, "r"; + die "Unable to read $file_2: $!\n" unless defined $fh_2; + chomp(my @lines_2 = <$fh_2>); + $fh_2->close; + + my $n_no_change = 0; + my $n_modified = 0; + my $n_added = 0; + my $n_deleted = 0; + my @min_sdiff = (); +my $NN = chr(27) . "[0m"; # normal +my $BB = chr(27) . "[1m"; # bold + + my @sdiffs = sdiff( \@lines_1, \@lines_2 ); + foreach my $entry (@sdiffs) { + my ($out_1, $out_2) = ('', ''); + if ($entry->[0] eq 'u') { + ++$n_no_change; + # $out_1 = $entry->[1]; + # $out_2 = $entry->[2]; + next; + } +# push @min_sdiff, $entry; + if ($entry->[0] eq 'c') { + ++$n_modified; + ($out_1, $out_2) = diff_two_strings($entry->[1], $entry->[2]); + $out_1 =~ s/\cA(\w)/${BB}$1${NN}/g; + $out_2 =~ s/\cA(\w)/${BB}$1${NN}/g; + # $out_1 =~ s/\cA//g; + # $out_2 =~ s/\cA//g; + } elsif ($entry->[0] eq '+') { + ++$n_added; + $out_1 = $entry->[1]; + $out_2 = $entry->[2]; + } elsif ($entry->[0] eq '-') { + ++$n_deleted; + $out_1 = $entry->[1]; + $out_2 = $entry->[2]; + } elsif ($entry->[0] eq 'u') { + } else { die "unknown entry->[0]=[$entry->[0]]\n"; } + printf "%-80s | %s\n", $out_1, $out_2; + } + +# foreach my $entry (@min_sdiff) { +# printf "DIFF: %s %s %s\n", @{$entry}; +# } +} # 1}}} +sub write_comments_to_html { # {{{1 + my ($filename , # in + $rah_diff_L , # in see routine array_diff() for explanation + $rah_diff_R , # in see routine array_diff() for explanation + $rh_blank , # in location and counts of blank lines + ) = @_; + + print "-> write_comments_to_html($filename)\n" if $opt_v > 2; + my $file = $filename . ".html"; +#use Data::Dumper; +#print Dumper("rah_diff_L", $rah_diff_L, "rah_diff_R", $rah_diff_R); + my $OUT = new IO::File $file, "w"; + if (!defined $OUT) { + warn "Unable to write to $file\n"; + print "<- write_comments_to_html\n" if $opt_v > 2; + return; + } + + my $approx_line_count = scalar @{$rah_diff_L}; + my $n_digits = 1 + int(log($approx_line_count)/2.30258509299405); # log_10 + + my $html_out = html_header($filename); + + my $comment_line_number = 0; + for (my $i = 0; $i < scalar @{$rah_diff_R}; $i++) { + if (defined $rh_blank->{$i}) { + foreach (1..$rh_blank->{$i}) { + $html_out .= "\n"; + } + } + my $line_num = ""; + my $pre = ""; + my $post = '  '; +warn "undef rah_diff_R[$i]{type} " unless defined $rah_diff_R->[$i]{type}; + if ($rah_diff_R->[$i]{type} eq 'nonexist') { + ++$comment_line_number; + $line_num = sprintf "\  %0${n_digits}d %s", + $comment_line_number, $post; + $pre = ''; + $html_out .= $line_num; + $html_out .= $pre . + html_metachars($rah_diff_L->[$i]{char}) . + $post . "\n"; + next; + } + if ($rah_diff_R->[$i]{type} eq 'code' and + $rah_diff_R->[$i]{desc} eq 'same') { + # entire line remains as-is + $line_num = sprintf "\  %0${n_digits}d %s", + $rah_diff_R->[$i]{lnum}, $post; + $pre = ''; + $html_out .= $line_num; + $html_out .= $pre . + html_metachars($rah_diff_R->[$i]{char}) . $post; +#XX } elsif ($rah_diff_R->[$i]{type} eq 'code') { # code+comments +#XX +#XX $line_num = '' . +#XX $rah_diff_R->[$i]{lnum} . $post; +#XX $html_out .= $line_num; +#XX +#XX my @strings = @{$rah_diff_R->[$i]{char}{strings}}; +#XX my @type = @{$rah_diff_R->[$i]{char}{type}}; +#XX for (my $i = 0; $i < scalar @strings; $i++) { +#XX if ($type[$i] eq 'u') { +#XX $pre = ''; +#XX } else { +#XX $pre = ''; +#XX } +#XX $html_out .= $pre . html_metachars($strings[$i]) . $post; +#XX } +# print Dumper(@strings, @type); die; + + } elsif ($rah_diff_R->[$i]{type} eq 'comment') { + $line_num = '' . $comment_line_number . $post; + # entire line is a comment + $pre = ''; + $html_out .= $pre . + html_metachars($rah_diff_R->[$i]{char}) . $post; + } +#printf "%-30s %s %-30s\n", $line_1, $separator, $line_2; + $html_out .= "\n"; + } + + $html_out .= html_end(); + + my $out_file = "$filename.html"; + open OUT, ">$out_file" or die "Cannot write to $out_file $!\n"; + print OUT $html_out; + close OUT; + print "Wrote $out_file\n" unless $opt_quiet; + $OUT->close; + + print "<- write_comments_to_html\n" if $opt_v > 2; +} # 1}}} +sub array_diff { # {{{1 + my ($file , # in only used for error reporting + $ra_lines_L , # in array of lines in Left file (no blank lines) + $ra_lines_R , # in array of lines in Right file (no blank lines) + $mode , # in "comment" | "revision" + $rah_diff_L , # out + $rah_diff_R , # out + $raa_Errors , # in/out + ) = @_; + + # This routine operates in two ways: + # A. Computes diffs of the same file with and without comments. + # This is used to classify lines as code, comments, or blank. + # B. Computes diffs of two revisions of a file. This method + # requires a prior run of method A using the older version + # of the file because it needs lines to be classified. + + # $rah_diff structure: + # An array with n entries where n equals the number of lines in + # an sdiff of the two files. Each entry in the array describes + # the contents of the corresponding line in file Left and file Right: + # diff[]{type} = blank | code | code+comment | comment | nonexist + # {lnum} = line number within the original file (1-based) + # {desc} = same | added | removed | modified + # {char} = the input line unless {desc} = 'modified' in + # which case + # {char}{strings} = [ substrings ] + # {char}{type} = [ disposition (added, removed, etc)] + # + + print "-> array_diff()\n" if $opt_v > 2; + my $COMMENT_MODE = 0; + $COMMENT_MODE = 1 if $mode eq "comment"; + +#print "array_diff(mode=$mode)\n"; +#print Dumper("block left:" , $ra_lines_L); +#print Dumper("block right:", $ra_lines_R); + + my @sdiffs = sdiff($ra_lines_L, $ra_lines_R); +#use Data::Dumper::Simple; +#print Dumper($ra_lines_L, $ra_lines_R, @sdiffs); +#die; + + my $n_L = 0; + my $n_R = 0; + my $n_sdiff = 0; # index to $rah_diff_L, $rah_diff_R + @{$rah_diff_L} = (); + @{$rah_diff_R} = (); + foreach my $triple (@sdiffs) { + my $flag = $triple->[0]; + my $line_L = $triple->[1]; + my $line_R = $triple->[2]; + $rah_diff_L->[$n_sdiff]{char} = $line_L; + $rah_diff_R->[$n_sdiff]{char} = $line_R; + if ($flag eq 'u') { # u = unchanged + ++$n_L; + ++$n_R; + if ($COMMENT_MODE) { + # line exists in both with & without comments, must be code + $rah_diff_L->[$n_sdiff]{type} = "code"; + $rah_diff_R->[$n_sdiff]{type} = "code"; + } + $rah_diff_L->[$n_sdiff]{desc} = "same"; + $rah_diff_R->[$n_sdiff]{desc} = "same"; + $rah_diff_L->[$n_sdiff]{lnum} = $n_L; + $rah_diff_R->[$n_sdiff]{lnum} = $n_R; + } elsif ($flag eq 'c') { # c = changed +# warn "per line sdiff() commented out\n"; if (0) { + ++$n_L; + ++$n_R; + + if ($COMMENT_MODE) { + # line has text both with & without comments; + # count as code + $rah_diff_L->[$n_sdiff]{type} = "code"; + $rah_diff_R->[$n_sdiff]{type} = "code"; + } + + my @chars_L = split '', $line_L; + my @chars_R = split '', $line_R; + +#XX my @inline_sdiffs = sdiff( \@chars_L, \@chars_R ); + +#use Data::Dumper::Simple; +#if ($n_R == 6 or $n_R == 1 or $n_R == 2) { +#print "L=[$line_L]\n"; +#print "R=[$line_R]\n"; +#print Dumper(@chars_L, @chars_R, @inline_sdiffs); +#} +#XX my @index = (); +#XX foreach my $il_triple (@inline_sdiffs) { +#XX # make an array of u|c|+|- corresponding +#XX # to each character +#XX push @index, $il_triple->[0]; +#XX } +#XX#print Dumper(@index); die; +#XX # expect problems if arrays @index and $inline_sdiffs[1]; +#XX # (@{$inline_sdiffs->[1]} are the characters of line_L) +#XX # aren't the same length +#XX my $prev_type = $index[0]; +#XX my @strings = (); # blocks of consecutive code or comment +#XX my @type = (); # u (=code) or c (=comment) +#XX my $j_str = 0; +#XX $strings[$j_str] .= $chars_L[0]; +#XX $type[$j_str] = $prev_type; +#XX for (my $i = 1; $i < scalar @chars_L; $i++) { +#XX if ($index[$i] ne $prev_type) { +#XX ++$j_str; +#XX#print "change at j_str=$j_str type=$index[$i]\n"; +#XX $type[$j_str] = $index[$i]; +#XX $prev_type = $index[$i]; +#XX } +#XX $strings[$j_str] .= $chars_L[$i]; +#XX } +# print Dumper(@strings, @type); die; +#XX delete $rah_diff_R->[$n_sdiff]{char}; +#XX @{$rah_diff_R->[$n_sdiff]{char}{strings}} = @strings; +#XX @{$rah_diff_R->[$n_sdiff]{char}{type}} = @type; + $rah_diff_L->[$n_sdiff]{desc} = "modified"; + $rah_diff_R->[$n_sdiff]{desc} = "modified"; + $rah_diff_L->[$n_sdiff]{lnum} = $n_L; + $rah_diff_R->[$n_sdiff]{lnum} = $n_R; +#} + + } elsif ($flag eq '+') { # + = added + ++$n_R; + if ($COMMENT_MODE) { + # should never get here + @{$rah_diff_L} = (); + @{$rah_diff_R} = (); + push @{$raa_Errors}, + [ $Error_Codes{'Diff error (quoted comments?)'}, $file ]; + if ($opt_v) { + warn "array_diff: diff failure (diff says the\n"; + warn "comment-free file has added lines).\n"; + warn "$n_sdiff $line_L\n"; + } + last; + } + $rah_diff_L->[$n_sdiff]{type} = "nonexist"; + $rah_diff_L->[$n_sdiff]{desc} = "removed"; + $rah_diff_R->[$n_sdiff]{desc} = "added"; + $rah_diff_R->[$n_sdiff]{lnum} = $n_R; + } elsif ($flag eq '-') { # - = removed + ++$n_L; + if ($COMMENT_MODE) { + # line must be comment because blanks already gone + $rah_diff_L->[$n_sdiff]{type} = "comment"; + } + $rah_diff_R->[$n_sdiff]{type} = "nonexist"; + $rah_diff_R->[$n_sdiff]{desc} = "removed"; + $rah_diff_L->[$n_sdiff]{desc} = "added"; + $rah_diff_L->[$n_sdiff]{lnum} = $n_L; + } +#printf "%-30s %s %-30s\n", $line_L, $separator, $line_R; + ++$n_sdiff; + } +#use Data::Dumper::Simple; +#print Dumper($rah_diff_L, $rah_diff_R); + + print "<- array_diff\n" if $opt_v > 2; +} # 1}}} +sub remove_leading_dir { # {{{1 + my @filenames = @_; + # + # Input should be a list of file names + # with the same leading directory such as + # + # dir1/dir2/a.txt + # dir1/dir2/b.txt + # dir1/dir2/dir3/c.txt + # + # Output is the same list minus the common + # directory path: + # + # a.txt + # b.txt + # dir3/c.txt + # + print "-> remove_leading_dir()\n" if $opt_v > 2; + my @D = (); # a matrix: [ [ dir1, dir2 ], # dir1/dir2/a.txt + # [ dir1, dir2 ], # dir1/dir2/b.txt + # [ dir1, dir2 , dir3] ] # dir1/dir2/dir3/c.txt + if ($ON_WINDOWS) { + foreach my $F (@filenames) { + $F =~ s{\\}{/}g; + $F = ucfirst($F) if $F =~ /^\w:/; # uppercase drive letter + } + } + if (scalar @filenames == 1) { + # special case: with only one filename + # cannot determine a baseline + return ( basename $filenames[0] ); + } + foreach my $F (@filenames) { + my ($Vol, $Dir, $File) = File::Spec->splitpath($F); + my @x = File::Spec->splitdir( $Dir ); + pop @x unless $x[$#x]; # last entry usually null, remove it + if ($ON_WINDOWS) { + if (defined($Vol) and $Vol) { + # put the drive letter, eg, C:, at the front + unshift @x, uc $Vol; + } + } +#print "F=$F, Dir=$Dir x=[", join("][", @x), "]\n"; + push @D, [ @x ]; + } + + # now loop over columns until either they are all + # eliminated or a unique column is found + +#use Data::Dumper::Simple; +#print Dumper("remove_leading_dir after ", @D); + + my @common = (); # to contain the common leading directories + my $mismatch = 0; + while (!$mismatch) { + for (my $row = 1; $row < scalar @D; $row++) { +#print "comparing $D[$row][0] to $D[0][0]\n"; + + if (!defined $D[$row][0] or !defined $D[0][0] or + ($D[$row][0] ne $D[0][0])) { + $mismatch = 1; + last; + } + } +#print "mismatch=$mismatch\n"; + if (!$mismatch) { + push @common, $D[0][0]; + # all terms in the leading match; unshift the batch + foreach my $ra (@D) { + shift @{$ra}; + } + } + } + + push @common, " "; # so that $leading will end with "/ " + my $leading = File::Spec->catdir( @common ); + $leading =~ s{ $}{}; # now take back the bogus appended space +#print "remove_leading_dir leading=[$leading]\n"; die; + if ($ON_WINDOWS) { + $leading =~ s{\\}{/}g; + } + foreach my $F (@filenames) { + $F =~ s{^$leading}{}; + } + + print "<- remove_leading_dir()\n" if $opt_v > 2; + return @filenames; + +} # 1}}} +sub align_by_pairs { # {{{1 + my ($rh_file_list_L , # in + $rh_file_list_R , # in + $ra_added , # out + $ra_removed , # out + $ra_compare_list , # out + ) = @_; + print "-> align_by_pairs()\n" if $opt_v > 2; + @{$ra_compare_list} = (); + + my @files_L = sort keys %{$rh_file_list_L}; + my @files_R = sort keys %{$rh_file_list_R}; + return () unless @files_L or @files_R; # at least one must have stuff + if ( @files_L and !@files_R) { + # left side has stuff, right side is empty; everything deleted + @{$ra_added } = (); + @{$ra_removed } = @files_L; + @{$ra_compare_list} = (); + return; + } elsif (!@files_L and @files_R) { + # left side is empty, right side has stuff; everything added + @{$ra_added } = @files_R; + @{$ra_removed } = (); + @{$ra_compare_list} = (); + return; + } +#use Data::Dumper::Simple; +#print Dumper("align_by_pairs", @files_L, @files_R); +#die; + if (scalar @files_L == 1 and scalar @files_R == 1) { + # The easy case: compare two files. + push @{$ra_compare_list}, [ $files_L[0], $files_R[0] ]; + @{$ra_added } = (); + @{$ra_removed} = (); + return; + } + # The harder case: compare groups of files. This only works + # if the groups are in different directories so the first step + # is to strip the leading directory names from file lists to + # make it possible to align by file names. + my @files_L_minus_dir = remove_leading_dir(@files_L); + my @files_R_minus_dir = remove_leading_dir(@files_R); + + # Keys of the stripped_X arrays are canonical file names; + # should overlap mostly. Keys in stripped_L but not in + # stripped_R are files that have been deleted. Keys in + # stripped_R but not in stripped_L have been added. + my %stripped_L = (); + @stripped_L{ @files_L_minus_dir } = @files_L; + my %stripped_R = (); + @stripped_R{ @files_R_minus_dir } = @files_R; + + my %common = (); + foreach my $f (keys %stripped_L) { + $common{$f} = 1 if defined $stripped_R{$f}; + } + + my %deleted = (); + foreach my $f (keys %stripped_L) { + $deleted{$stripped_L{$f}} = $f unless defined $stripped_R{$f}; + } + + my %added = (); + foreach my $f (keys %stripped_R) { + $added{$stripped_R{$f}} = $f unless defined $stripped_L{$f}; + } + +#use Data::Dumper::Simple; +#print Dumper("align_by_pairs", %stripped_L, %stripped_R); +#print Dumper("align_by_pairs", %common, %added, %deleted); + + foreach my $f (keys %common) { + push @{$ra_compare_list}, [ $stripped_L{$f}, + $stripped_R{$f} ]; + } + @{$ra_added } = keys %added ; + @{$ra_removed } = keys %deleted; + + print "<- align_by_pairs()\n" if $opt_v > 2; + return; +#print Dumper("align_by_pairs", @files_L_minus_dir, @files_R_minus_dir); +#die; +} # 1}}} +sub html_header { # {{{1 + my ($title , ) = @_; + + print "-> html_header\n" if $opt_v > 2; + return +' + + + +' . +" + +$title +" . +' + + + +

+';
+    print "<- html_header\n" if $opt_v > 2;
+} # 1}}}
+sub html_end {                               # {{{1
+return 
+'
+ + +'; +} # 1}}} +sub die_unknown_lang { # {{{1 + my ($lang, $option_name) = @_; + die "Unknown language '$lang' used with $option_name option. " . + "The command\n $script --show-lang\n" . + "will print all recognized languages. Language names are " . + "case sensitive.\n" ; +} # 1}}} +sub unicode_file { # {{{1 + my $file = shift @_; + + print "-> unicode_file($file)\n" if $opt_v > 2; + return 0 if (-s $file > 2_000_000); + # don't bother trying to test binary files bigger than 2 MB + + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + return 0; + } + my @lines = <$IN>; + $IN->close; + + if (unicode_to_ascii( join('', @lines) )) { + print "<- unicode_file()\n" if $opt_v > 2; + return 1; + } else { + print "<- unicode_file()\n" if $opt_v > 2; + return 0; + } + +} # 1}}} +sub unicode_to_ascii { # {{{1 + my $string = shift @_; + + # A trivial attempt to convert Microsoft Windows style Unicode + # files into ASCII. These files exhibit the following byte + # sequence: + # byte 1: 255 + # byte 2: 254 + # byte 3: ord of ASCII character + # byte 4: 0 + # byte 3+i: ord of ASCII character + # byte 4+i: 0 + + my @ascii = (); + my $looks_like_unicode = 1; + my $length = length $string; +# print "length=$length\n"; + if ($length <= 3) { + $looks_like_unicode = 0; + return ''; + } + my @unicode = split(//, $string); + + for (my $i = 2; $i < $length; $i += 2) { +# print "examining [$unicode[$i]] ord ", ord($unicode[$i]), "\n"; + if (32 <= ord($unicode[$i]) and ord($unicode[$i]) <= 127 + or ord($unicode[$i]) == 13 + or ord($unicode[$i]) == 10 + or ord($unicode[$i]) == 9 + ) { + push @ascii, $unicode[$i]; +# print "adding [$unicode[$i]]\n"; + } else { + $looks_like_unicode = 0; + last; + } + if ($i+1 < $length) { + if (!$unicode[$i+1]) { + $looks_like_unicode = 0; + last; + } + } + } + if ($looks_like_unicode) { + return join("", @ascii); + } else { + return ''; + } +} # 1}}} +sub uncompress_archive_cmd { # {{{1 + my ($archive_file, ) = @_; + + # Wrap $archive_file in single or double quotes in the system + # commands below to avoid filename chicanery (including + # spaces in the names). + + print "-> uncompress_archive_cmd($archive_file)\n" if $opt_v > 2; + my $extract_cmd = ""; + my $missing = ""; + if ($opt_extract_with) { + ( $extract_cmd = $opt_extract_with ) =~ s/>FILE 2; + if ($missing) { + die "Unable to expand $archive_file because external\n", + "utility '$missing' is not available.\n", + "Another possibility is to use the --extract-with option.\n"; + } else { + return $extract_cmd; + } +} +# 1}}} +sub read_list_file { # {{{1 + my ($file, ) = @_; + + print "-> read_list_file($file)\n" if $opt_v > 2; + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + next; + } + my @entry = (); + while (<$IN>) { + next if /^\s*$/ or /^\s*#/; # skip empty or commented lines + chomp; + push @entry, $_; + } + $IN->close; + + print "<- read_list_file\n" if $opt_v > 2; + return @entry; +} +# 1}}} +sub external_utility_exists { # {{{1 + my $exe = shift @_; + + my $success = 0; + if ($ON_WINDOWS) { + $success = 1 unless system $exe . ' > nul'; + } else { + $success = 1 unless system $exe . ' >& /dev/null'; + if (!$success) { + $success = 1 unless system "which" . " $exe" . ' >& /dev/null'; + } + } + + return $success; +} # 1}}} +sub write_xsl_file { # {{{1 + my $OUT = new IO::File $CLOC_XSL, "w"; + if (!defined $OUT) { + warn "Unable to write $CLOC_XSL $!\n"; + return; + } + my $XSL = # {{{2 +' + + + + + + + CLOC Results + + + +

+'; +# 2}}} + + if ($opt_by_file) { + $XSL .= #
{{{2 +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + +
FileBlankCommentCodeLanguage3rd Generation EquivalentScale
Total
+
+'; +# 2}}} + } + + if (!$opt_by_file or $opt_by_file_by_lang) { + $XSL .= #
{{{2 +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + + + + + + + +'; + $XSL .= +' + +' if $opt_3; + $XSL .= +' + +
LanguageFilesBlankCommentCodeScale3rd Generation Equivalent
Total
+'; +# 2}}} + } + + $XSL.= <<'EO_XSL'; # {{{2 + + +
+
+ +EO_XSL +# 2}}} + + my $XSL_DIFF = <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + CLOC Results + + + +

+EO_DIFF_XSL +# 2}}} + + if ($opt_by_file) { + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + + + + + + + + + + + + + + +
Same
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Modified
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Added
FileBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + +
Removed
FileBlankCommentCode
+EO_DIFF_XSL +# 2}}} + } + + if (!$opt_by_file or $opt_by_file_by_lang) { + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + + + + + + + + + + + + + + + + + + + + + + +
Same
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Modified
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Added
LanguageFilesBlankCommentCode
+ + + + + + + + + + + + + + + + + + + + + + + + +
Removed
LanguageFilesBlankCommentCode
+EO_DIFF_XSL +# 2}}} + + } + + $XSL_DIFF.= <<'EO_DIFF_XSL'; # {{{2 + + +
+
+EO_DIFF_XSL +# 2}}} + if ($opt_diff) { + print $OUT $XSL_DIFF; + } else { + print $OUT $XSL; + } + $OUT->close(); +} # 1}}} +sub normalize_file_names { # {{{1 + my (@files, ) = @_; + + # Returns a hash of file names reduced to a canonical form + # (fully qualified file names, all path separators changed to /, + # Windows file names lowercased). Hash values are the original + # file name. + + my %normalized = (); + foreach my $F (@files) { + my $F_norm = $F; + if ($ON_WINDOWS) { + $F_norm = lc $F_norm; # for case insensitive file name comparisons + $F_norm =~ s{\\}{/}g; # Windows directory separators to Unix + $F_norm =~ s{^\./}{}g; # remove leading ./ + if (($F_norm !~ m{^/}) and ($F_norm !~ m{^\w:/})) { + # looks like a relative path; prefix with cwd + $F_norm = lc "$cwd/$F_norm"; + } + } else { + $F_norm =~ s{^\./}{}g; # remove leading ./ + if ($F_norm !~ m{^/}) { + # looks like a relative path; prefix with cwd + $F_norm = lc "$cwd/$F_norm"; + } + } + $normalized{ $F_norm } = $F; + } + return %normalized; +} # 1}}} +sub combine_diffs { # {{{1 + # subroutine by Andy (awalshe@sf.net) + # https://sourceforge.net/tracker/?func=detail&aid=3261017&group_id=174787&atid=870625 + my ($ra_files) = @_; + + my $res = "$URL v $VERSION\n"; + my $dl = '-'; + my $width = 79; + # columns are in this order + my @cols = ('files', 'blank', 'comment', 'code'); + my %HoH = (); + + foreach my $file (@{$ra_files}) { + my $IN = new IO::File $file, "r"; + if (!defined $IN) { + warn "Unable to read $file; ignoring.\n"; + next; + } + + my $sec; + while (<$IN>) { + next if /^(http|Language|-----)/; + if (/^[A-Z][a-z]*/) { # section title + $sec = $_; + chomp($sec); + $HoH{$sec} = () if ! exists $HoH{$sec}; + next; + } + + if (/^\s(same|modified|added|removed)/) { # calculated totals row + my @ar = grep { $_ ne '' } split(/ /, $_); + chomp(@ar); + my $ttl = shift @ar; + my $i = 0; + foreach(@ar) { + my $t = "$ttl$dl$cols[$i]"; + $HoH{$sec}{$t} = 0 if ! exists $HoH{$sec}{$t}; + $HoH{$sec}{$t} += $_; + $i++; + } + } + } + $IN->close; + } + + # rows are in this order + my @rows = ('same', 'modified', 'added', 'removed'); + + $res .= sprintf("%s\n", "-" x $width); + $res .= sprintf("%-19s %14s %14s %14s %14s\n", 'Language', + $cols[0], $cols[1], $cols[2], $cols[3]); + $res .= sprintf("%s\n", "-" x $width); + + for my $sec ( keys %HoH ) { + next if $sec =~ /SUM:/; + $res .= "$sec\n"; + foreach (@rows) { + $res .= sprintf(" %-18s %14s %14s %14s %14s\n", + $_, $HoH{$sec}{"$_$dl$cols[0]"}, + $HoH{$sec}{"$_$dl$cols[1]"}, + $HoH{$sec}{"$_$dl$cols[2]"}, + $HoH{$sec}{"$_$dl$cols[3]"}); + } + } + $res .= sprintf("%s\n", "-" x $width); + my $sec = 'SUM:'; + $res .= "$sec\n"; + foreach (@rows) { + $res .= sprintf(" %-18s %14s %14s %14s %14s\n", + $_, $HoH{$sec}{"$_$dl$cols[0]"}, + $HoH{$sec}{"$_$dl$cols[1]"}, + $HoH{$sec}{"$_$dl$cols[2]"}, + $HoH{$sec}{"$_$dl$cols[3]"}); + } + $res .= sprintf("%s\n", "-" x $width); + + return $res; +} # 1}}} +# subroutines copied from SLOCCount +my %lex_files = (); # really_is_lex() +my %expect_files = (); # really_is_expect() +my %pascal_files = (); # really_is_pascal(), really_is_incpascal() +my %php_files = (); # really_is_php() +sub really_is_lex { # {{{1 +# Given filename, returns TRUE if its contents really is lex. +# lex file must have "%%", "%{", and "%}". +# In theory, a lex file doesn't need "%{" and "%}", but in practice +# they all have them, and requiring them avoid mislabeling a +# non-lexfile as a lex file. + + my $filename = shift; + chomp($filename); + + my $is_lex = 0; # Value to determine. + my $percent_percent = 0; + my $percent_opencurly = 0; + my $percent_closecurly = 0; + + # Return cached result, if available: + if ($lex_files{$filename}) { return $lex_files{$filename};} + + open(LEX_FILE, "<$filename") || + die "Can't open $filename to determine if it's lex.\n"; + while() { + $percent_percent++ if (m/^\s*\%\%/); + $percent_opencurly++ if (m/^\s*\%\{/); + $percent_closecurly++ if (m/^\s*\%\}/); + } + close(LEX_FILE); + + if ($percent_percent && $percent_opencurly && $percent_closecurly) + {$is_lex = 1;} + + $lex_files{$filename} = $is_lex; # Store result in cache. + + return $is_lex; +} # 1}}} +sub really_is_expect { # {{{1 +# Given filename, returns TRUE if its contents really are Expect. +# Many "exp" files (such as in Apache and Mesa) are just "export" data, +# summarizing something else # (e.g., its interface). +# Sometimes (like in RPM) it's just misc. data. +# Thus, we need to look at the file to determine +# if it's really an "expect" file. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Expect _IF_ it: +# 1. has "load_lib" command and either "#" comments or {}. +# 2. {, }, and one of: proc, if, [...], expect + + my $is_expect = 0; # Value to determine. + + my $begin_brace = 0; # Lines that begin with curly braces. + my $end_brace = 0; # Lines that begin with curly braces. + my $load_lib = 0; # Lines with the Load_lib command. + my $found_proc = 0; + my $found_if = 0; + my $found_brackets = 0; + my $found_expect = 0; + my $found_pound = 0; + + # Return cached result, if available: + if ($expect_files{$filename}) { return expect_files{$filename};} + + open(EXPECT_FILE, "<$filename") || + die "Can't open $filename to determine if it's expect.\n"; + while() { + + if (m/#/) {$found_pound++; s/#.*//;} + if (m/^\s*\{/) { $begin_brace++;} + if (m/\{\s*$/) { $begin_brace++;} + if (m/^\s*\}/) { $end_brace++;} + if (m/\};?\s*$/) { $end_brace++;} + if (m/^\s*load_lib\s+\S/) { $load_lib++;} + if (m/^\s*proc\s/) { $found_proc++;} + if (m/^\s*if\s/) { $found_if++;} + if (m/\[.*\]/) { $found_brackets++;} + if (m/^\s*expect\s/) { $found_expect++;} + } + close(EXPECT_FILE); + + if ($load_lib && ($found_pound || ($begin_brace && $end_brace))) + {$is_expect = 1;} + if ( $begin_brace && $end_brace && + ($found_proc || $found_if || $found_brackets || $found_expect)) + {$is_expect = 1;} + + $expect_files{$filename} = $is_expect; # Store result in cache. + + return $is_expect; +} # 1}}} +sub really_is_pascal { # {{{1 +# Given filename, returns TRUE if its contents really are Pascal. + +# This isn't as obvious as it seems. +# Many ".p" files are Perl files +# (such as /usr/src/redhat/BUILD/ispell-3.1/dicts/czech/glob.p), +# others are C extractions +# (such as /usr/src/redhat/BUILD/linux/include/linux/umsdos_fs.p +# and some files in linuxconf). +# However, test files in "p2c" really are Pascal, for example. + +# Note that /usr/src/redhat/BUILD/ucd-snmp-4.1.1/ov/bitmaps/UCD.20.p +# is actually C code. The heuristics determine that they're not Pascal, +# but because it ends in ".p" it's not counted as C code either. +# I believe this is actually correct behavior, because frankly it +# looks like it's automatically generated (it's a bitmap expressed as code). +# Rather than guess otherwise, we don't include it in a list of +# source files. Let's face it, someone who creates C files ending in ".p" +# and expects them to be counted by default as C files in SLOCCount needs +# their head examined. I suggest examining their head +# with a sucker rod (see syslogd(8) for more on sucker rods). + +# This heuristic counts as Pascal such files such as: +# /usr/src/redhat/BUILD/teTeX-1.0/texk/web2c/tangleboot.p +# Which is hand-generated. We don't count woven documents now anyway, +# so this is justifiable. + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it's Pascal _IF_ it has all of the following +# (ignoring {...} and (*...*) comments): +# 1. "^..program NAME" or "^..unit NAME", +# 2. "procedure", "function", "^..interface", or "^..implementation", +# 3. a "begin", and +# 4. it ends with "end.", +# +# Or it has all of the following: +# 1. "^..module NAME" and +# 2. it ends with "end.". +# +# Or it has all of the following: +# 1. "^..program NAME", +# 2. a "begin", and +# 3. it ends with "end.". +# +# The "end." requirements in particular filter out non-Pascal. +# +# Note (jgb): this does not detect Pascal main files in fpc, like +# fpc-1.0.4/api/test/testterminfo.pas, which does not have "program" in +# it + + my $is_pascal = 0; # Value to determine. + + my $has_program = 0; + my $has_unit = 0; + my $has_module = 0; + my $has_procedure_or_function = 0; + my $found_begin = 0; + my $found_terminating_end = 0; + my $has_begin = 0; + + # Return cached result, if available: + if ($pascal_files{$filename}) { return pascal_files{$filename};} + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while() { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$has_program=1;} + if (m/\bunit\s+[A-Za-z]/i) {$has_unit=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$has_module=1;} + if (m/\bprocedure\b/i) { $has_procedure_or_function = 1; } + if (m/\bfunction\b/i) { $has_procedure_or_function = 1; } + if (m/^\s*interface\s+/i) { $has_procedure_or_function = 1; } + if (m/^\s*implementation\s+/i) { $has_procedure_or_function = 1; } + if (m/\bbegin\b/i) { $has_begin = 1; } + # Originally I said: + # "This heuristic fails if there are multi-line comments after + # "end."; I haven't seen that in real Pascal programs:" + # But jgb found there are a good quantity of them in Debian, specially in + # fpc (at the end of a lot of files there is a multiline comment + # with the changelog for the file). + # Therefore, assume Pascal if "end." appears anywhere in the file. + if (m/end\.\s*$/i) {$found_terminating_end = 1;} +# elsif (m/\S/) {$found_terminating_end = 0;} + } + close(PASCAL_FILE); + + # Okay, we've examined the entire file looking for clues; + # let's use those clues to determine if it's really Pascal: + + if ( ( ($has_unit || $has_program) && $has_procedure_or_function && + $has_begin && $found_terminating_end ) || + ( $has_module && $found_terminating_end ) || + ( $has_program && $has_begin && $found_terminating_end ) ) + {$is_pascal = 1;} + + $pascal_files{$filename} = $is_pascal; # Store result in cache. + + return $is_pascal; +} # 1}}} +sub really_is_incpascal { # {{{1 +# Given filename, returns TRUE if its contents really are Pascal. +# For .inc files (mainly seen in fpc) + + my $filename = shift; + chomp($filename); + +# The heuristic is as follows: it is Pacal if any of the following: +# 1. really_is_pascal returns true +# 2. Any usual reserverd word is found (program, unit, const, begin...) + + # If the general routine for Pascal files works, we have it + if (&really_is_pascal ($filename)) { + $pascal_files{$filename} = 1; + return 1; + } + + my $is_pascal = 0; # Value to determine. + my $found_begin = 0; + + open(PASCAL_FILE, "<$filename") || + die "Can't open $filename to determine if it's pascal.\n"; + while() { + s/\{.*?\}//g; # Ignore {...} comments on this line; imperfect, but effective. + s/\(\*.*?\*\)//g; # Ignore (*...*) comments on this line; imperfect, but effective. + if (m/\bprogram\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bunit\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bmodule\s+[A-Za-z]/i) {$is_pascal=1;} + if (m/\bprocedure\b/i) {$is_pascal = 1; } + if (m/\bfunction\b/i) {$is_pascal = 1; } + if (m/^\s*interface\s+/i) {$is_pascal = 1; } + if (m/^\s*implementation\s+/i) {$is_pascal = 1; } + if (m/\bconstant\s+/i) {$is_pascal=1;} + if (m/\bbegin\b/i) { $found_begin = 1; } + if ((m/end\.\s*$/i) && ($found_begin = 1)) {$is_pascal = 1;} + if ($is_pascal) { + last; + } + } + + close(PASCAL_FILE); + $pascal_files{$filename} = $is_pascal; # Store result in cache. + return $is_pascal; +} # 1}}} +sub really_is_php { # {{{1 +# Given filename, returns TRUE if its contents really is php. + + my $filename = shift; + chomp($filename); + + my $is_php = 0; # Value to determine. + # Need to find a matching pair of surrounds, with ending after beginning: + my $normal_surround = 0; # + my $script_surround = 0; # ; bit 0 =