Skip to content

Commit 0af2fe4

Browse files
committed
Improve copyright detection
Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent fc9cad2 commit 0af2fe4

35 files changed

+393
-12
lines changed

src/cluecode/copyrights.py

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,12 @@ def build_detection_from_node(
806806
# verbatime star
807807
(r'^\*$', 'JUNK'),
808808

809+
# misc company names exception to next rule
810+
(r'^TinCanTools$', 'NNP'),
811+
(r'^SoftwareBitMaker$', 'NNP'),
812+
(r'^NetCommWireless$', 'NNP'),
813+
814+
# Repeated CamelCasedWords
809815
(r'^([A-Z][a-z]+){3,}$', 'JUNK'),
810816

811817
############################################################################
@@ -1079,7 +1085,7 @@ def build_detection_from_node(
10791085
(r'^whom$', 'JUNK'),
10801086
(r'^However,?$', 'JUNK'),
10811087
(r'^[Cc]ollectively$', 'JUNK'),
1082-
(r'^following$', 'JUNK'),
1088+
(r'^following$', 'FOLLOWING'),
10831089
(r'^[Cc]onfig$', 'JUNK'),
10841090
(r'^file\.$', 'JUNK'),
10851091

@@ -1184,7 +1190,7 @@ def build_detection_from_node(
11841190
(r'^[a-z]{3,10}[A-Z][a-z]{3,10}$', 'JUNK'),
11851191

11861192
(r'^\$?Guid$', 'JUNK'),
1187-
(r'^Small$', 'NN'),
1193+
#(r'^Small$', 'NN'),
11881194
(r'^implementing$', 'JUNK'),
11891195
(r'^Unlike$', 'JUNK'),
11901196
(r'^using$', 'JUNK'),
@@ -1206,6 +1212,11 @@ def build_detection_from_node(
12061212
# single period
12071213
(r"^\.$", 'JUNK'),
12081214

1215+
# exception to the next rule
1216+
1217+
# by PaX Team
1218+
(r"PaX$", 'NN'),
1219+
12091220
# short mixed caps with trailing cap: ZoY
12101221
(r"[A-Z][a-z][A-Z]$", 'JUNK'),
12111222

@@ -1405,6 +1416,7 @@ def build_detection_from_node(
14051416
(r'^STA$', 'NN'),
14061417
(r'^Page$', 'NN'),
14071418
(r'^Todo/Under$', 'JUNK'),
1419+
(r'^Under$', 'NN'),
14081420

14091421
(r'^Interrupt$', 'NN'),
14101422
(r'^cleanups?$', 'JUNK'),
@@ -1668,6 +1680,8 @@ def build_detection_from_node(
16681680
(r'^([Mm]onday|[Tt]uesday|[Ww]ednesday|[Tt]hursday|[Ff]riday|[Ss]aturday|[Ss]unday),?$', 'DAY'),
16691681
(r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun|May),?$', 'NN'),
16701682

1683+
(r'^[Dd]ebugging$', 'JUNK'),
1684+
16711685
# misc words that are not NNs
16721686
# lowercase verbs ending in "ing"
16731687
(r'^[a-z]+ing$', 'NN'),
@@ -1700,6 +1714,9 @@ def build_detection_from_node(
17001714
(r'^Moved$', 'NN'),
17011715
(r'^Phone$', 'NN'),
17021716

1717+
(r'^Inputs?$', 'NN'),
1718+
1719+
17031720
# dual caps that are not NNP
17041721
(r'^Make[A-Z]', 'JUNK'),
17051722
(r'^Create[A-Z]', 'JUNK'),
@@ -2069,6 +2086,7 @@ def build_detection_from_node(
20692086
# and Spanish/French Da Siva and De Gaulle
20702087
(r'^(([Vv][ao]n)|[Dd][aeu])$', 'VAN'),
20712088

2089+
(r'^aan$', 'OF'),
20722090
(r'^van$', 'VAN'),
20732091
(r'^Van$', 'VAN'),
20742092
(r'^von$', 'VAN'),
@@ -2289,6 +2307,9 @@ def build_detection_from_node(
22892307
# some punctuation combos
22902308
(r'^(?:=>|->|<-|<=)$', 'JUNK'),
22912309

2310+
(r'^semiconductors?[\.,]?$', 'NNP'),
2311+
2312+
22922313
############################################################################
22932314
# catch all other as Nouns
22942315
############################################################################
@@ -2320,6 +2341,10 @@ def build_detection_from_node(
23202341
23212342
CD: {<BARE-YR>} #bareyear
23222343
2344+
# 5 Jan 2003
2345+
YR-RANGE: {<CD> <NNP> <YR-RANGE>} #72.3
2346+
2347+
23232348
#######################################
23242349
# All/No/Some Rights Reserved
23252350
#######################################
@@ -2344,6 +2369,9 @@ def build_detection_from_node(
23442369
# foo@bar.com or baz@bar.com
23452370
EMAIL: {<EMAIL> <NN> <EMAIL>} # email or email
23462371
2372+
# <srinivasa.deevi at conexant dot com>
2373+
EMAIL: {<EMAIL_START> <CC> <NN> <DOT> <NN> } #email with brackets
2374+
23472375
#######################################
23482376
# NAMES and COMPANIES
23492377
#######################################
@@ -2559,18 +2587,21 @@ def build_detection_from_node(
25592587
NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
25602588
NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
25612589
2562-
NAME: {<NNP><NNP>} #5611
2590+
NAME: {<NNP><NNP>} #561
25632591
25642592
# strip Software from Copyright (c) Ian Darwin 1995. Software
2565-
NAME-YEAR: {<NAME>+ <YR-RANGE>} #5611
2593+
NAME-YEAR: {<NAME>+ <YR-RANGE>} #561.1
25662594
25672595
# Copyright 2018, OpenCensus Authors
2568-
COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #1579991
2596+
COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #561.2
2597+
2598+
# Tom aan de Wiel
2599+
NAME: {<NNP> <OF> <VAN> <NNP> } # 561.3
25692600
2570-
NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #5612
2601+
NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #562
25712602
25722603
#Academy of Motion Picture Arts and Sciences
2573-
NAME: {<NAME> <CC> <NNP>} #561
2604+
NAME: {<NAME> <CC> <NNP>} #563
25742605
25752606
# Adam Weinberger and the GNOME Foundation
25762607
ANDCO: {<CC> <NN> <COMPANY>} #565
@@ -2582,6 +2613,8 @@ def build_detection_from_node(
25822613
25832614
URL: {<PARENS> <URL> <PARENS>} #5700
25842615
2616+
NAME-YEAR: {<NAME-YEAR> <CD> <NNP>} #5700.1
2617+
25852618
#also accept trailing email and URLs
25862619
# and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <alek.du@intel.com>
25872620
NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
@@ -2984,7 +3017,11 @@ def build_detection_from_node(
29843017
29853018
# Russ Dill <Russ.Dill@asu.edu> 2001-2003
29863019
# Rewrited by Vladimir Oleynik <dzo@simtreas.ru> (C) 2003
2987-
COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #22793.5
3020+
COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #2280-2
3021+
3022+
# Copyright (C) 2018
3023+
# Author: Jeff LaBundy <jeff@labundy.com>
3024+
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
29883025
29893026
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
29903027
@@ -3259,6 +3296,9 @@ def build_detection_from_node(
32593296
# copyrighted by the Open Source Vulnerability Database (http://osvdb.org)
32603297
COPYRIGHT: {<COPY> <BY> <NN|NNP>{3} <NAME>} #83002.1
32613298
3299+
# (C) by the respective authors,
3300+
<COPYRIGHT>: { <COPY> <BY> <NN> <NN> <AUTHDOT>} #83002.2
3301+
32623302
# weird //opylefted by <-Harvie 2oo7
32633303
COPYRIGHT: {<COPY> <BY> <NN> <NN> <MAINT>?} #83003
32643304
@@ -3302,6 +3342,14 @@ def build_detection_from_node(
33023342
# Gracenote Software, copyright © 2000-2008 Gracenote.
33033343
COPYRIGHT: {<COMPANY> <COPY>{1,2} <NAME-YEAR>} #157999.12
33043344
3345+
#Copyright (C) 2012-2016 by the following authors:
3346+
#- Wladimir J. van der Laan <laanwj@gmail.com>
3347+
3348+
NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3349+
NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
3350+
COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
3351+
3352+
33053353
#######################################
33063354
# Copyright is held by ....
33073355
#######################################
@@ -3804,6 +3852,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
38043852
'$',
38053853
'current.year',
38063854
"©",
3855+
'author',
3856+
'authors',
38073857
])
38083858
))
38093859

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
* Copyright (C) 2016-2018
2+
* Author: Matt Ranostay <matt.ranostay@konsulko.com>
3+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- Copyright (c) 2016-2018 Author Matt Ranostay <matt.ranostay@konsulko.com>
7+
holders:
8+
- Matt Ranostay
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Copyright 2010 Ben Dooks <ben-linux <at> fluff.org>
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- Copyright 2010 Ben Dooks ben-linux <at> fluff.org
7+
holders:
8+
- Ben Dooks ben-linux <at> fluff.org
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Copyright (C) 2008 <srinivasa.deevi at conexant dot com>
2+
Based on em28xx driver
3+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- Copyright (c) 2008 srinivasa.deevi at conexant dot com
7+
holders:
8+
- srinivasa.deevi at conexant dot com
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
* started by Ingo Molnar, Copyright (C) 2001
2+
* debugging by David Rientjes, Copyright (C) 2015
3+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- Ingo Molnar, Copyright (c) 2001
7+
- David Rientjes, Copyright (c) 2015
8+
holders:
9+
- Ingo Molnar
10+
- David Rientjes
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
Copyright (C) 2012-2016 by the following authors:
2+
- Wladimir J. van der Laan <laanwj@gmail.com>
3+
- Christian Gmeiner <christian.gmeiner@gmail.com>
4+
- Lucas Stach <l.stach@pengutronix.de>
5+
- Russell King <rmk@arm.linux.org.uk>
6+

0 commit comments

Comments
 (0)