Skip to content

Commit 7f72ab9

Browse files
committed
Improve copyright detection
Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent be5cdba commit 7f72ab9

27 files changed

+217
-0
lines changed

src/cluecode/copyrights.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,8 @@ def build_detection_from_node(
581581
'(20[0-3][0-9][\\.,\\-])+20[0-3][0-9]' # 2001-2012
582582
'|'
583583
'(20[0-3][0-9][\\.,\\-])+20[0-3]x' # 2001-201x
584+
'|'
585+
'(20[0-3][0-9][\\.,\\-])+20[0-3][0-9]a' # 2001-2012a
584586
')')
585587

586588
_PUNCT = (
@@ -1257,6 +1259,7 @@ def build_detection_from_node(
12571259
(r'^Activation\.?$', 'NN'),
12581260
(r'^Act[\.,]?$', 'NN'),
12591261
(r'^Added$', 'NN'),
1262+
(r'^added$', 'JUNK'),
12601263
(r'^As$', 'NN'),
12611264
(r'^I$', 'NN'),
12621265
(r'^Additional$', 'NN'),
@@ -1350,6 +1353,11 @@ def build_detection_from_node(
13501353
(r'^Entity$', 'NN'),
13511354
(r'^Example', 'NN'),
13521355
(r'^Except', 'NN'),
1356+
(r'^Fragments$', 'NN'),
1357+
(r'^With$', 'NN'),
1358+
(r'^Tick$', 'NN'),
1359+
(r'^Dynamic$', 'NN'),
1360+
13531361
(r'^When$', 'NN'),
13541362
# (r'^Owner$', 'NN'),
13551363
(r'^Specifications?$', 'NN'),
@@ -1567,6 +1575,14 @@ def build_detection_from_node(
15671575
(r'^They$', 'JUNK'),
15681576
(r'^Branched$', 'NN'),
15691577

1578+
(r'^Improved$', 'NN'),
1579+
(r'^Designed$', 'NN'),
1580+
(r'^Organised$', 'NN'),
1581+
(r'^Re-organised$', 'NN'),
1582+
(r'^Swap$', 'NN'),
1583+
(r'^Adapted$', 'JUNK'),
1584+
(r'^Thumb$', 'NN'),
1585+
15701586
# alone this is not enough for an NNP
15711587
(r'^Free$', 'NN'),
15721588

@@ -1620,6 +1636,7 @@ def build_detection_from_node(
16201636
(r'^Unlike$', 'NN'),
16211637
(r'^Compression$', 'NN'),
16221638
(r'^Letter$', 'NN'),
1639+
(r'^Moved$', 'NN'),
16231640

16241641
# dual caps that are not NNP
16251642
(r'^Make[A-Z]', 'JUNK'),
@@ -1799,6 +1816,9 @@ def build_detection_from_node(
17991816
(r'^(S\.?A\.?S?|Sas|sas|A\/S|AG,?|AB|Labs?|[Cc][Oo]|Research|Center|INRIA|Societe|KG)[,\.]?$', 'COMP'),
18001817
# French SARL
18011818
(r'^(SARL|S\.A\.R\.L\.)[\.,\)]*$', 'COMP'),
1819+
# More company suffix : a.s. in Czechia and otehrs
1820+
(r'^(a\.s\.|S\.r\.l\.?)$', 'COMP'),
1821+
(r'^Vertriebsges\.m\.b\.H\.?,?$', 'COMP'),
18021822

18031823
# company suffix : AS: this is frequent beyond Norway.
18041824
(r'^AS', 'CAPS'),
@@ -2070,6 +2090,9 @@ def build_detection_from_node(
20702090
# proper noun with apostrophe ': d'Itri
20712091
(r"^[a-z]'[A-Z]?[a-z]+[,\.]?$", 'NNP'),
20722092

2093+
# exceptions to all CAPS words
2094+
(r'^[A-Z]{3,4}[0-9]{4},?$', 'NN'),
2095+
20732096
# all CAPS word, at least 1 char long such as MIT, including an optional trailing comma or dot
20742097
(r'^[A-Z0-9]+,?$', 'CAPS'),
20752098

@@ -2680,6 +2703,9 @@ def build_detection_from_node(
26802703
# Copyright (c) Ian F. Darwin 1986, 1987, 1989, 1990, 1991, 1992, 1994, 1995.
26812704
COPYRIGHT: {<COPY>+ <NAME|NAME-EMAIL|NAME-YEAR>+ <YR-RANGE>*} #157999
26822705
2706+
# Copyright (c) 2014 Czech Technical University in Prague
2707+
COPYRIGHT: {<COPYRIGHT> <NN> <UNI> <NAME>} #157999-name
2708+
26832709
COPYRIGHT: {<COPY>+ <CAPS|NNP>+ <CC> <NN> <COPY> <YR-RANGE>?} #1590
26842710
26852711
# // (c) (C) → ©
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
* Copyright 2010-2011a Analog Devices Inc.
2+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- Copyright 2010-2011a Analog Devices Inc.
7+
holders:
8+
- Analog Devices Inc.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Copyright (C) 1998 Frederic Rible F1OAT (frible@teaser.fr)
2+
Adapted from baycom.c driver written by Thomas Sailer (sailer@ife.ee.ethz.ch)
3+
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- Copyright (c) 1998 Frederic Rible F1OAT (frible@teaser.fr)
7+
holders:
8+
- Frederic Rible F1OAT
9+
authors:
10+
- Thomas Sailer (sailer@ife.ee.ethz.ch)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
(c) 2004 MontaVista Software, Inc.
2+
* - Adapted from gdb/sim/arm/thumbemu.c
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- (c) 2004 MontaVista Software, Inc.
7+
holders:
8+
- MontaVista Software, Inc.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.:
2+
* - added disk storage for bitmap
3+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
what:
2+
- copyrights
3+
- holders
4+
- authors
5+
copyrights:
6+
- Copyright (c) 2003-2004, Paul Clements, SteelEye Technology, Inc.
7+
holders:
8+
- Paul Clements, SteelEye Technology, Inc.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
* Copyright (C) 2012 AK signal Brno a.s.
2+
* 2012 Jiri Prchal <jiri.prchal@aksignal.cz>
3+

0 commit comments

Comments
 (0)