Skip to content

Commit 6903f6f

Browse files
committed
Improv copyright detection even more
Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
1 parent 9139681 commit 6903f6f

38 files changed

+182
-19
lines changed

src/cluecode/copyrights.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,7 @@ def build_detection_from_node(
791791
(r'^A11yance', 'NNP'),
792792
(r'^Fu$', 'NNP'),
793793
(r'^W3C\(r\)$', 'COMP'),
794+
(r'^TeX$', 'NNP'),
794795

795796
# Three or more AsCamelCase GetQueueReference, with some exceptions
796797
(r'^(?:OpenStreetMap|AliasDotCom|AllThingsTalk).?$', 'NAME'),
@@ -1633,6 +1634,9 @@ def build_detection_from_node(
16331634
(r'^Adapted$', 'JUNK'),
16341635
(r'^Thumb$', 'NN'),
16351636

1637+
# SEEN IN Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
1638+
(r'^error_act$', 'NN'),
1639+
16361640
# alone this is not enough for an NNP
16371641
(r'^Free$', 'NN'),
16381642

@@ -1687,6 +1691,7 @@ def build_detection_from_node(
16871691
(r'^Compression$', 'NN'),
16881692
(r'^Letter$', 'NN'),
16891693
(r'^Moved$', 'NN'),
1694+
(r'^Phone$', 'NN'),
16901695

16911696
# dual caps that are not NNP
16921697
(r'^Make[A-Z]', 'JUNK'),
@@ -1790,6 +1795,11 @@ def build_detection_from_node(
17901795
(r'various\.?$', 'NNP'),
17911796
(r'SuSE$', 'COMPANY'),
17921797
(r'Suse$', 'COMPANY'),
1798+
(r'\(Winbond\),?$', 'COMP'),
1799+
1800+
# copyright : (C) 2002 by karsten wiese
1801+
(r'karsten$', 'NNP'),
1802+
(r'wiese$', 'NNP'),
17931803

17941804
# treat Attributable as proper noun as it is seen in Author tags such as in:
17951805
# @author not attributable
@@ -1823,8 +1833,10 @@ def build_detection_from_node(
18231833
(r'^.+,Inc\.$', 'COMPANY'),
18241834

18251835
(r'^[Cc]ompany[,\.]?\)?$', 'COMP'),
1826-
(r'^Limited[,\.]??$', 'COMP'),
1827-
(r'^LIMITED[,\.]??$', 'COMP'),
1836+
(r'^Limited[,\.]?$', 'COMP'),
1837+
(r'^LIMITED[,\.]?$', 'COMP'),
1838+
1839+
(r'^COMPANY,LTD$', 'COMP'),
18281840

18291841
# Caps company suffixes
18301842
(r'^INC[\.,\)]*$', 'COMP'),
@@ -1874,6 +1886,9 @@ def build_detection_from_node(
18741886
# Iceland
18751887
(r'^(ehf|hf|svf|ohf)\.,?$', 'COMP'),
18761888

1889+
# company abbreviations
1890+
(r'^(SPRL|srl)[\.,]?$', 'COMP'),
1891+
18771892
# company suffix : AS: this is frequent beyond Norway.
18781893
(r'^AS', 'CAPS'),
18791894
# that's the ASF, not some legal form
@@ -2412,6 +2427,9 @@ def build_detection_from_node(
24122427
# Project contributors
24132428
COMPANY: {<COMP> <CONTRIBUTORS>} #256
24142429
2430+
# Copyright (C) 2013 Ideas on board SPRL
2431+
COMPANY: {<NNP> <JUNK> <NN> <COMP>} #259
2432+
24152433
COMPANY: {<LINUX>? <COMP>+} #260
24162434
24172435
# Nokia Corporation and/or its subsidiary(-ies)
@@ -2437,12 +2455,22 @@ def build_detection_from_node(
24372455
# NAME-YEAR starts or ends with a YEAR range
24382456
NAME-YEAR: {<YR-RANGE> <NNP> <NNP>+} #350
24392457
2458+
COPYRIGHT: {<COPY> <YR-RANGE> <NNP> <NN> <NNP> <NNP> <NNP> <EMAIL>} #350.1
2459+
2460+
# Copyright (C) 1995-06 ICP vortex, Achim Leubner
2461+
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <CAPS> <NN> <NNP> <NNP> } #350.2
2462+
24402463
# Academy of Motion Picture Arts
24412464
NAME: {<NNP|PN>+ <NNP>+} #351
24422465
24432466
# Distributed Management Task Force
24442467
NAME: {<NN> <NNP>{3}} #881111
24452468
2469+
# Rudolf Marek <r.marek@assembler.cz>
2470+
# David Hubbard <david.c.hubbard@gmail.com>
2471+
# Daniel J Blueman <daniel.blueman@gmail.com>
2472+
# NAME: { <NAME> <EMAIL> } #351.0
2473+
24462474
# @author <a href="mailto:stephane@hillion.org">Stephane Hillion</a>
24472475
NAME: { <NN>? <NN>? <EMAIL> <NAME> } #351.1
24482476
@@ -3000,7 +3028,7 @@ def build_detection_from_node(
30003028
AUTHOR: {<BY|MAINT> <NAME-EMAIL> <YR-RANGE>?} #26382
30013029
30023030
# Russ Dill <Russ.Dill@asu.edu> 2001-2003
3003-
COPYRIGHT: {<NAME-EMAIL> <YR-RANGE>} #2638
3031+
# COPYRIGHT: {<NAME-EMAIL> <YR-RANGE>} #2638
30043032
30053033
# (C) 2001-2009, <s>Takuo KITAME, Bart Martens, and Canonical, LTD</s>
30063034
COPYRIGHT: {<COPYRIGHT> <NNP> <COMPANY>} #26381

src/cluecode/copyrights_hint.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@
7171
'</s>',
7272
'<s/>',
7373
'by ', # note the trailing space
74+
# common for emails
75+
'@',
7476
)
7577

7678
'''

src/licensedcode/data/licenses/cmigemo.LICENSE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ other_urls:
1212
- http://fedoraproject.org/wiki/Licensing/CMigemo
1313
ignorable_authors:
1414
- MURAOKA Taro
15+
- Translator Mamoru Tasaka <mtasaka@ioa.s.u-tokyo.ac.jp>
1516
ignorable_emails:
1617
- koron@tka.att.ne.jp
1718
- mtasaka@ioa.s.u-tokyo.ac.jp
@@ -84,4 +85,4 @@ below.
8485

8586
THE MAIN RULE ENDS HERE
8687
If you cannot agree with the conditions above, please
87-
stop using this software.
88+
stop using this software.

src/licensedcode/data/licenses/first-works-appreciative-1.2.LICENSE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ category: Proprietary Free
66
owner: Jonathan Michael Davis
77
spdx_license_key: LicenseRef-scancode-first-works-appreciative-1.2
88
ignorable_copyrights:
9-
- Copyright (c) 2005 Jonathan Michael Davis <jond_123@hotmail.com>
9+
- Copyright (c) 2005 Jonathan Michael Davis <jond_123@hotmail.com> <jon@jondavis.net>
1010
ignorable_holders:
1111
- Jonathan Michael Davis
1212
ignorable_emails:
@@ -137,4 +137,4 @@ License.
137137
whether in contract, strict liability, or tort (including negligence or
138138
otherwise) arising in any way out of the use or distribution of the Original
139139
Work or the exercise of any rights granted hereunder, even if advised of
140-
the possibility of such damages.
140+
the possibility of such damages.

src/licensedcode/data/licenses/nvidia-cuda-supplement-2020.LICENSE

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ ignorable_holders:
4545
- Yann Collet
4646
- Zoltan Herczeg
4747
ignorable_authors:
48-
- Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) David Keyes (david.keyes@kaust.edu.sa)
48+
- Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) David Keyes (david.keyes@kaust.edu.sa) Hatem
49+
Ltaief (hatem.ltaief@kaust.edu.sa)
4950
- Ahmad M. Abdelfattah, David Keyes, and Hatem Ltaief
5051
- D. E. Shaw Research
5152
- Davide Barbieri

src/licensedcode/data/licenses/pcre.LICENSE

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ ignorable_holders:
1616
- the University of Cambridge, England
1717
ignorable_authors:
1818
- Philip Hazel
19+
- Philip Hazel <ph10@cam.ac.uk> University of Cambridge Computing Service, Cambridge, England
20+
1921
ignorable_urls:
2022
- ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
2123
ignorable_emails:
@@ -68,4 +70,4 @@ which it is incompatible.
6870
The documentation for PCRE, supplied in the "doc" directory, is
6971
distributed under the same terms as the software itself.
7072

71-
End PCRE LICENCE
73+
End PCRE LICENCE

src/licensedcode/data/licenses/pygres-2.2.LICENSE

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ owner: Unspecified
77
spdx_license_key: LicenseRef-scancode-pygres-2.2
88
text_urls:
99
- http://shell.vex.net/viewvc.cgi/pygresql/trunk/module/pgmodule.c?view=markup&pathrev=431
10+
ignorable_authors:
11+
- Pascal Andre, andre@chimay.via.ecp.fr
1012
ignorable_copyrights:
1113
- Copyright (c) 1995, Pascal Andre (andre@via.ecp.fr)
1214
- copyright 1997, 1998, 1999 by D'Arcy J.M. Cain (darcy@druid.net)
@@ -42,4 +44,4 @@ AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES,
4244
ENHANCEMENTS, OR MODIFICATIONS.
4345

4446
Further modifications copyright 1997, 1998, 1999 by D'Arcy J.M. Cain
45-
(darcy@druid.net) subject to the same terms and conditions as above.
47+
(darcy@druid.net) subject to the same terms and conditions as above.

src/licensedcode/data/rules/pcre_13.RULE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ ignorable_holders:
99
- the University of Cambridge, England
1010
ignorable_authors:
1111
- Philip Hazel
12+
- Philip Hazel <ph10@cam.ac.uk> University of Cambridge Computing Service, Cambridge, England
1213
ignorable_urls:
1314
- ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/
1415
ignorable_emails:

tests/cluecode/data/copyright_fossology/testdata134.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ holders:
99
- Free Software Foundation, Inc.
1010
authors:
1111
- Ian Jackson, Erick Branderhorst, Galen Hazelwood, and Josip Rodin. Today
12-
- the Debian
12+
- the Debian TeX Task Force <debian-tex-maint@lists.debian.org>
13+
- Karl Berry <karl@cs.umb.edu>

tests/cluecode/data/copyright_fossology/testdata136.yml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,3 @@ copyrights:
88
holders:
99
- Free Software Foundation
1010
- Eazel, Inc
11-
authors:
12-
- maintainers Rodney Dawes <dobey@novell.com>

0 commit comments

Comments
 (0)