@@ -2237,7 +2237,8 @@ def build_detection_from_node(
2237
2237
# BY GEORGE J. CARRETTE
2238
2238
NAME: {<BY> <CAPS> <PN> <CAPS>} #85
2239
2239
2240
- DASHCAPS: {<DASH> <CAPS>}
2240
+ DASHCAPS: {<DASH> <CAPS>} #899999
2241
+
2241
2242
# INRIA - CIRAD - INRA
2242
2243
COMPANY: {<COMP> <DASHCAPS>+} #1280
2243
2244
@@ -2266,7 +2267,7 @@ def build_detection_from_node(
2266
2267
COMPANY: {<NNP> <IN><NN> <NNP> <NNP>+<COMP>?} #180
2267
2268
2268
2269
# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
2269
- COMPANY: {<NNP> <NNP> <CC> <NNP> <COMP> <NNP> <CAPS>}
2270
+ COMPANY: {<NNP> <NNP> <CC> <NNP> <COMP> <NNP> <CAPS>} #190
2270
2271
2271
2272
COMPANY: {<NNP> <CC> <NNP> <COMP> <NNP>*} #200
2272
2273
@@ -2334,6 +2335,9 @@ def build_detection_from_node(
2334
2335
# Academy of Motion Picture Arts
2335
2336
NAME: {<NNP|PN>+ <NNP>+} #351
2336
2337
2338
+ # Distributed Management Task Force
2339
+ # NAME: {<NN> <NNP>{3}} #881111
2340
+
2337
2341
# @author <a href="mailto:stephane@hillion.org">Stephane Hillion</a>
2338
2342
NAME: { <NN>? <NN>? <EMAIL> <NAME> } #351.1
2339
2343
@@ -2452,7 +2456,7 @@ def build_detection_from_node(
2452
2456
COMPANY: {<COMP|COMPANY|NNP> <NN> <COMPANY|COMPANY> <NNP>+} #800
2453
2457
2454
2458
# by the Institute of Electrical and Electronics Engineers, Inc.
2455
- COMPANY: {<BY> <NN> <COMPANY> <OF> <NNP> <CC> <COMPANY>}
2459
+ COMPANY: {<BY> <NN> <COMPANY> <OF> <NNP> <CC> <COMPANY>} #805
2456
2460
COMPANY: {<COMPANY> <CC> <AUTH|CONTRIBUTORS|AUTHS>} #810
2457
2461
2458
2462
# A community of developers
@@ -2461,9 +2465,12 @@ def build_detection_from_node(
2461
2465
# Copyright (c) 2002-2010 The ANGLE Project Authors
2462
2466
COMPANY: {<NN> <COMP|COMPANY>+ <AUTHS>?} #820
2463
2467
2468
+ ANDCO: {<CC> <NNP>? <NN> <URL>} #825
2469
+
2464
2470
# this is catching a wide net by treating any bare URL as a company
2465
2471
COMPANY: {<NNP>? <URL|URL2>} #830
2466
2472
2473
+
2467
2474
COMPANY: {<COMPANY> <COMP|COMPANY>} #840
2468
2475
2469
2476
# the Software and Component Technologies group of Trimble Navigation, Ltd.
@@ -2543,10 +2550,10 @@ def build_detection_from_node(
2543
2550
COMPANY: {<BY> <NN>+ <COMP|COMPANY>} #1420
2544
2551
2545
2552
# the Regents of the University of California, Sun Microsystems, Inc., Scriptics Corporation
2546
- COMPANY: {<NN> <NNP> <OF> <NN> <UNI> <OF> <COMPANY>+}
2553
+ COMPANY: {<NN> <NNP> <OF> <NN> <UNI> <OF> <COMPANY>+} #1422
2547
2554
2548
- # Copyright (c) 1998-2000 University College London
2549
- COMPANY: {<UNI> <UNI> <NNP>}
2555
+ # Copyright (c) 1998-2000 University College London #1423
2556
+ COMPANY: {<UNI> <UNI> <NNP>} #1427
2550
2557
2551
2558
# "And" some name
2552
2559
ANDCO: {<CC>+ <NN> <NNP>+<UNI|COMP>?} #1430
@@ -2589,8 +2596,8 @@ def build_detection_from_node(
2589
2596
# Copyright 2015 The Error Prone Authors.
2590
2597
NAME: {<NN> <NAME> <CONTRIBUTORS|AUTHS>} #196023
2591
2598
2592
- # Copyright (C) <s>Suresh P <suresh@ippimail.com></s> #19601
2593
- NAME: {<NNP> <PN> <EMAIL>}
2599
+ # Copyright (C) <s>Suresh P <suresh@ippimail.com></s>
2600
+ NAME: {<NNP> <PN> <EMAIL>} #19601.1
2594
2601
2595
2602
# Copyright or Copr. Mines Paristech, France - Mark NOBLE, Alexandrine GESRET
2596
2603
NAME: {<NAME> <DASH> <NAME> <CAPS>} #19601
@@ -2739,13 +2746,13 @@ def build_detection_from_node(
2739
2746
COPYRIGHT: {<NAME-COPY> <NNP>} #2274
2740
2747
2741
2748
# Copyright 1994-2007 (c) RealNetworks, Inc.
2742
- COPYRIGHT: {<COPY>+ <YR-RANGE> <COPYRIGHT>} #2274
2749
+ COPYRIGHT: {<COPY>+ <YR-RANGE> <COPYRIGHT>} #2275
2743
2750
2744
2751
# Copyright (c) 2017 Contributors et.al.
2745
2752
COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <CONTRIBUTORS> <OTH> } #2276
2746
2753
2747
2754
#Copyright (c) 2020 Contributors as noted in the AUTHORS file
2748
- COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <CONTRIBUTORS> <NN>* <IN>? <NN>* <CAPS|AUTHS|ATH> <JUNK> }
2755
+ COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <CONTRIBUTORS> <NN>* <IN>? <NN>* <CAPS|AUTHS|ATH> <JUNK> } #2277.1
2749
2756
2750
2757
# copyrighted by Object Computing, Inc., St. Louis Missouri, Copyright (C) 2002, all rights reserved.
2751
2758
COPYRIGHT: {<COPYRIGHT> <COPY>+ <YR-RANGE> <ALLRIGHTRESERVED>} #2278
@@ -2922,9 +2929,9 @@ def build_detection_from_node(
2922
2929
COPYRIGHT2: {<COPYRIGHT2> <JUNK> <COMPANY>} # 2010
2923
2930
2924
2931
# copyright C 1988 by the Institute of Electrical and Electronics Engineers, Inc.
2925
- COPYRIGHT: {<COPY> <PN> <YR-RANGE> <COMPANY>}
2932
+ COPYRIGHT: {<COPY> <PN> <YR-RANGE> <COMPANY>} #2274.1
2926
2933
2927
- COPYRIGHT2: {<NAME-COPY> <COPYRIGHT2>} #2274
2934
+ COPYRIGHT2: {<NAME-COPY> <COPYRIGHT2>} #2274.2
2928
2935
2929
2936
# (C) COPYRIGHT 2004 UNIVERSITY OF CHICAGO
2930
2937
COPYRIGHT: {<COPYRIGHT2> <UNI> <OF> <CAPS>} #2276
@@ -3069,7 +3076,7 @@ def build_detection_from_node(
3069
3076
COPYRIGHT: {<COPY> <NN>?<NNP>+ <AUTHS>} #83004
3070
3077
3071
3078
# (C) Distributed Management Task Force (Distributed is an NN)
3072
- COPYRIGHT: {<COPY> <NN> <NAME>} #83010
3079
+ # COPYRIGHT: {<COPY> <NN> <NAME>} #83010
3073
3080
3074
3081
# Copyright (c) 2014 The Rust Project Developers
3075
3082
COPYRIGHT: {<COPYRIGHT> <MAINT> } #83020
@@ -4030,7 +4037,16 @@ def candidate_lines(numbered_lines):
4030
4037
4031
4038
previous_chars = chars_only
4032
4039
if TRACE :
4033
- logger_debug (' candidate_lines: line is <s></s>candidate' )
4040
+ logger_debug (' candidate_lines: line is <s></s> candidate' )
4041
+
4042
+ elif 'http' in line :
4043
+ # this is for copyright listing many URLs
4044
+ in_copyright = 2
4045
+ candidates_append (numbered_line )
4046
+
4047
+ previous_chars = chars_only
4048
+ if TRACE :
4049
+ logger_debug (' candidate_lines: line is HTTP candidate' )
4034
4050
4035
4051
elif in_copyright > 0 :
4036
4052
# these are a sign that the copyrights continue after
@@ -4045,6 +4061,7 @@ def candidate_lines(numbered_lines):
4045
4061
'copyrights' ,
4046
4062
'and' ,
4047
4063
'by' ,
4064
+ ',' ,
4048
4065
))
4049
4066
)
4050
4067
and not has_trailing_year (previous_chars )
@@ -4177,8 +4194,12 @@ def prepare_text_line(line, dedeb=True, to_ascii=True):
4177
4194
.replace ('( C)' , ' (c) ' )
4178
4195
.replace ('(C)' , ' (c) ' )
4179
4196
.replace ('(c)' , ' (c) ' )
4180
- # the case of \251 is tested by 'weirdencoding.h'
4197
+ .replace ('( © )' , ' (c) ' )
4198
+ .replace ('(©)' , ' (c) ' )
4199
+ .replace ('(© )' , ' (c) ' )
4200
+ .replace ('( ©)' , ' (c) ' )
4181
4201
.replace ('©' , ' (c) ' )
4202
+ # the case of \251 is tested by 'weirdencoding.h'
4182
4203
.replace ('\251 ' , ' (c) ' )
4183
4204
.replace ('©' , ' (c) ' )
4184
4205
.replace ('©' , ' (c) ' )
0 commit comments