@@ -806,6 +806,12 @@ def build_detection_from_node(
806
806
# verbatime star
807
807
(r'^\*$' , 'JUNK' ),
808
808
809
+ # misc company names exception to next rule
810
+ (r'^TinCanTools$' , 'NNP' ),
811
+ (r'^SoftwareBitMaker$' , 'NNP' ),
812
+ (r'^NetCommWireless$' , 'NNP' ),
813
+
814
+ # Repeated CamelCasedWords
809
815
(r'^([A-Z][a-z]+){3,}$' , 'JUNK' ),
810
816
811
817
############################################################################
@@ -1079,7 +1085,7 @@ def build_detection_from_node(
1079
1085
(r'^whom$' , 'JUNK' ),
1080
1086
(r'^However,?$' , 'JUNK' ),
1081
1087
(r'^[Cc]ollectively$' , 'JUNK' ),
1082
- (r'^following$' , 'JUNK ' ),
1088
+ (r'^following$' , 'FOLLOWING ' ),
1083
1089
(r'^[Cc]onfig$' , 'JUNK' ),
1084
1090
(r'^file\.$' , 'JUNK' ),
1085
1091
@@ -1184,7 +1190,7 @@ def build_detection_from_node(
1184
1190
(r'^[a-z]{3,10}[A-Z][a-z]{3,10}$' , 'JUNK' ),
1185
1191
1186
1192
(r'^\$?Guid$' , 'JUNK' ),
1187
- (r'^Small$' , 'NN' ),
1193
+ # (r'^Small$', 'NN'),
1188
1194
(r'^implementing$' , 'JUNK' ),
1189
1195
(r'^Unlike$' , 'JUNK' ),
1190
1196
(r'^using$' , 'JUNK' ),
@@ -1206,6 +1212,11 @@ def build_detection_from_node(
1206
1212
# single period
1207
1213
(r"^\.$" , 'JUNK' ),
1208
1214
1215
+ # exception to the next rule
1216
+
1217
+ # by PaX Team
1218
+ (r"PaX$" , 'NN' ),
1219
+
1209
1220
# short mixed caps with trailing cap: ZoY
1210
1221
(r"[A-Z][a-z][A-Z]$" , 'JUNK' ),
1211
1222
@@ -1405,6 +1416,7 @@ def build_detection_from_node(
1405
1416
(r'^STA$' , 'NN' ),
1406
1417
(r'^Page$' , 'NN' ),
1407
1418
(r'^Todo/Under$' , 'JUNK' ),
1419
+ (r'^Under$' , 'NN' ),
1408
1420
1409
1421
(r'^Interrupt$' , 'NN' ),
1410
1422
(r'^cleanups?$' , 'JUNK' ),
@@ -1668,6 +1680,8 @@ def build_detection_from_node(
1668
1680
(r'^([Mm]onday|[Tt]uesday|[Ww]ednesday|[Tt]hursday|[Ff]riday|[Ss]aturday|[Ss]unday),?$' , 'DAY' ),
1669
1681
(r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun|May),?$' , 'NN' ),
1670
1682
1683
+ (r'^[Dd]ebugging$' , 'JUNK' ),
1684
+
1671
1685
# misc words that are not NNs
1672
1686
# lowercase verbs ending in "ing"
1673
1687
(r'^[a-z]+ing$' , 'NN' ),
@@ -1700,6 +1714,9 @@ def build_detection_from_node(
1700
1714
(r'^Moved$' , 'NN' ),
1701
1715
(r'^Phone$' , 'NN' ),
1702
1716
1717
+ (r'^Inputs?$' , 'NN' ),
1718
+
1719
+
1703
1720
# dual caps that are not NNP
1704
1721
(r'^Make[A-Z]' , 'JUNK' ),
1705
1722
(r'^Create[A-Z]' , 'JUNK' ),
@@ -2069,6 +2086,7 @@ def build_detection_from_node(
2069
2086
# and Spanish/French Da Siva and De Gaulle
2070
2087
(r'^(([Vv][ao]n)|[Dd][aeu])$' , 'VAN' ),
2071
2088
2089
+ (r'^aan$' , 'OF' ),
2072
2090
(r'^van$' , 'VAN' ),
2073
2091
(r'^Van$' , 'VAN' ),
2074
2092
(r'^von$' , 'VAN' ),
@@ -2134,7 +2152,10 @@ def build_detection_from_node(
2134
2152
(r'^\$?date-of-software$' , 'YR' ),
2135
2153
(r'^\$?date-of-document$' , 'YR' ),
2136
2154
2137
- # cardinal numbers
2155
+ # small-cardinal numbers, under 30
2156
+ (r'^[0-3]?[0-9]?[\.,]?$' , 'CDS' ),
2157
+
2158
+ # all other cardinal numbers
2138
2159
(r'^-?[0-9]+(.[0-9]+)?[\.,]?$' , 'CD' ),
2139
2160
2140
2161
############################################################################
@@ -2179,6 +2200,7 @@ def build_detection_from_node(
2179
2200
2180
2201
# exceptions to CAPS used in obfuscated emails like in joe AT foo DOT com
2181
2202
(r'^AT$' , 'AT' ),
2203
+ (r'^AT$' , '<at>' ),
2182
2204
(r'^DOT$' , 'DOT' ),
2183
2205
2184
2206
# all CAPS word, at least 1 char long such as MIT, including an optional trailing comma or dot
@@ -2288,6 +2310,9 @@ def build_detection_from_node(
2288
2310
# some punctuation combos
2289
2311
(r'^(?:=>|->|<-|<=)$' , 'JUNK' ),
2290
2312
2313
+ (r'^semiconductors?[\.,]?$' , 'NNP' ),
2314
+
2315
+
2291
2316
############################################################################
2292
2317
# catch all other as Nouns
2293
2318
############################################################################
@@ -2308,17 +2333,21 @@ def build_detection_from_node(
2308
2333
2309
2334
YR-RANGE: {<YR>+ <CC>+ <YR>} #20
2310
2335
YR-RANGE: {<YR> <DASH|TO>* <YR|BARE-YR>+} #30
2311
- YR-RANGE: {<CD|BARE-YR>? <YR> <BARE-YR>?} #40
2336
+ YR-RANGE: {<CD|CDS| BARE-YR>? <YR> <BARE-YR>?} #40
2312
2337
YR-RANGE: {<YR>+ <BARE-YR>? } #50
2313
2338
YR-AND: {<CC>? <YR>+ <CC>+ <YR>} #60
2314
2339
YR-RANGE: {<YR-AND>+} #70
2315
2340
YR-RANGE: {<YR-RANGE>+ <DASH|TO> <YR-RANGE>+} #71
2316
2341
YR-RANGE: {<YR-RANGE>+ <DASH>?} #72
2317
2342
# Copyright (c) 1999, 2000, 01, 03, 06 Ralf Baechle
2318
- YR-RANGE: {<YR-RANGE> <CD>+} #72.2
2343
+ YR-RANGE: {<YR-RANGE> <CD|CDS >+} #72.2
2319
2344
2320
2345
CD: {<BARE-YR>} #bareyear
2321
2346
2347
+ # 5 Jan 2003
2348
+ YR-RANGE: {<CDS> <NNP> <YR-RANGE>} #72.3
2349
+
2350
+
2322
2351
#######################################
2323
2352
# All/No/Some Rights Reserved
2324
2353
#######################################
@@ -2343,6 +2372,9 @@ def build_detection_from_node(
2343
2372
# foo@bar.com or baz@bar.com
2344
2373
EMAIL: {<EMAIL> <NN> <EMAIL>} # email or email
2345
2374
2375
+ # <srinivasa.deevi at conexant dot com>
2376
+ EMAIL: {<EMAIL_START> <CC> <NN> <DOT> <NN> } #email with brackets
2377
+
2346
2378
#######################################
2347
2379
# NAMES and COMPANIES
2348
2380
#######################################
@@ -2408,8 +2440,9 @@ def build_detection_from_node(
2408
2440
# AT&T Laboratories, Cambridge
2409
2441
COMPANY: {<COMP> <COMP> <NNP>} #145
2410
2442
2443
+ COMPANY: {<COMP> <CD|CDS> <COMP>} #170
2444
+
2411
2445
# rare "Software in the public interest, Inc."
2412
- COMPANY: {<COMP> <CD> <COMP>} #170
2413
2446
COMPANY: {<NNP> <IN><NN> <NNP> <NNP>+<COMP>?} #180
2414
2447
2415
2448
# Commonwealth Scientific and Industrial Research Organisation (CSIRO)
@@ -2558,18 +2591,21 @@ def build_detection_from_node(
2558
2591
NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
2559
2592
NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
2560
2593
2561
- NAME: {<NNP><NNP>} #5611
2594
+ NAME: {<NNP><NNP>} #561
2562
2595
2563
2596
# strip Software from Copyright (c) Ian Darwin 1995. Software
2564
- NAME-YEAR: {<NAME>+ <YR-RANGE>} #5611
2597
+ NAME-YEAR: {<NAME>+ <YR-RANGE>} #561.1
2565
2598
2566
2599
# Copyright 2018, OpenCensus Authors
2567
- COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #1579991
2600
+ COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #561.2
2601
+
2602
+ # Tom aan de Wiel
2603
+ NAME: {<NNP> <OF> <VAN> <NNP> } # 561.3
2568
2604
2569
- NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #5612
2605
+ NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #562
2570
2606
2571
2607
#Academy of Motion Picture Arts and Sciences
2572
- NAME: {<NAME> <CC> <NNP>} #561
2608
+ NAME: {<NAME> <CC> <NNP>} #563
2573
2609
2574
2610
# Adam Weinberger and the GNOME Foundation
2575
2611
ANDCO: {<CC> <NN> <COMPANY>} #565
@@ -2581,6 +2617,8 @@ def build_detection_from_node(
2581
2617
2582
2618
URL: {<PARENS> <URL> <PARENS>} #5700
2583
2619
2620
+ NAME-YEAR: {<NAME-YEAR> <CDS> <NNP>} #5700.1
2621
+
2584
2622
#also accept trailing email and URLs
2585
2623
# and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <alek.du@intel.com>
2586
2624
NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
@@ -2591,7 +2629,7 @@ def build_detection_from_node(
2591
2629
NAME: {<NN|NNP|CAPS>+ <CC> <OTH>} #600
2592
2630
NAME: {<NNP> <CAPS>} #610
2593
2631
NAME: {<CAPS> <DASH>? <NNP|NAME>} #620
2594
- NAME: {<NNP> <CD> <NNP>} #630
2632
+ NAME: {<NNP> <CD|CDS > <NNP>} #630
2595
2633
NAME: {<COMP> <NAME>+} #640
2596
2634
2597
2635
# Copyright 2018-2019 @paritytech/substrate-light-ui authors & contributors
@@ -2983,7 +3021,11 @@ def build_detection_from_node(
2983
3021
2984
3022
# Russ Dill <Russ.Dill@asu.edu> 2001-2003
2985
3023
# Rewrited by Vladimir Oleynik <dzo@simtreas.ru> (C) 2003
2986
- COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #22793.5
3024
+ COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #2280-2
3025
+
3026
+ # Copyright (C) 2018
3027
+ # Author: Jeff LaBundy <jeff@labundy.com>
3028
+ COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
2987
3029
2988
3030
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
2989
3031
@@ -3106,7 +3148,7 @@ def build_detection_from_node(
3106
3148
COPYRIGHT: {<COPYRIGHT2> <CAPS|COMPANY> <NN|LINUX> <COMPANY>} #2008
3107
3149
3108
3150
# Copyright (c) 2016-2018 JSR 371 expert group and contributors
3109
- COPYRIGHT: {<COPYRIGHT2> <CAPS> <CD> <COMPANY> <NAME>} #2009.1
3151
+ COPYRIGHT: {<COPYRIGHT2> <CAPS> <CD|CDS > <COMPANY> <NAME>} #2009.1
3110
3152
3111
3153
# COPYRIGHT (c) 2006 - 2009 DIONYSOS
3112
3154
COPYRIGHT: {<COPYRIGHT2> <CAPS>} #2009
@@ -3235,7 +3277,7 @@ def build_detection_from_node(
3235
3277
COPYRIGHT: {<COPY> <NNP> <NAME-YEAR> <COMPANY>?} #15720
3236
3278
3237
3279
# Copyright (c) 2008-1010 Intel Corporation
3238
- COPYRIGHT: {<COPY> <COPY> <CD> <COMPANY>} #rare-cd-not-year
3280
+ COPYRIGHT: {<COPY> <COPY> <CD|CDS > <COMPANY>} #rare-cd-not-year
3239
3281
3240
3282
# Copyright (C) 2005-2006 dann frazier <dannf@dannf.org>
3241
3283
COPYRIGHT: {<COPYRIGHT2> <NN> <NN> <EMAIL>} #999991
@@ -3258,6 +3300,9 @@ def build_detection_from_node(
3258
3300
# copyrighted by the Open Source Vulnerability Database (http://osvdb.org)
3259
3301
COPYRIGHT: {<COPY> <BY> <NN|NNP>{3} <NAME>} #83002.1
3260
3302
3303
+ # (C) by the respective authors,
3304
+ <COPYRIGHT>: { <COPY> <BY> <NN> <NN> <AUTHDOT>} #83002.2
3305
+
3261
3306
# weird //opylefted by <-Harvie 2oo7
3262
3307
COPYRIGHT: {<COPY> <BY> <NN> <NN> <MAINT>?} #83003
3263
3308
@@ -3301,6 +3346,14 @@ def build_detection_from_node(
3301
3346
# Gracenote Software, copyright © 2000-2008 Gracenote.
3302
3347
COPYRIGHT: {<COMPANY> <COPY>{1,2} <NAME-YEAR>} #157999.12
3303
3348
3349
+ #Copyright (C) 2012-2016 by the following authors:
3350
+ #- Wladimir J. van der Laan <laanwj@gmail.com>
3351
+
3352
+ NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3353
+ NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
3354
+ COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
3355
+
3356
+
3304
3357
#######################################
3305
3358
# Copyright is held by ....
3306
3359
#######################################
@@ -3412,11 +3465,11 @@ def build_detection_from_node(
3412
3465
3413
3466
COPYRIGHT: {<COMPANY><COPY>+<ALLRIGHTRESERVED>} #99900
3414
3467
3415
- COPYRIGHT: {<COPYRIGHT|COPYRIGHT2|COPY|NAME-COPY> <COPY|NNP|AUTHDOT|CAPS|CD|YR-RANGE|NAME|NAME-EMAIL|NAME-YEAR|NAME-COPY|NAME-CAPS|AUTHORANDCO|COMPANY|YEAR|PN|COMP|UNI|CC|OF|IN|BY|OTH|VAN|URL|EMAIL|URL2|MIXEDCAP|NN>+ <ALLRIGHTRESERVED>} #99999
3468
+ COPYRIGHT: {<COPYRIGHT|COPYRIGHT2|COPY|NAME-COPY> <COPY|NNP|AUTHDOT|CAPS|CD|CDS| YR-RANGE|NAME|NAME-EMAIL|NAME-YEAR|NAME-COPY|NAME-CAPS|AUTHORANDCO|COMPANY|YEAR|PN|COMP|UNI|CC|OF|IN|BY|OTH|VAN|URL|EMAIL|URL2|MIXEDCAP|NN>+ <ALLRIGHTRESERVED>} #99999
3416
3469
3417
3470
# * Copyright (C) 2004 Red Hat, Inc.
3418
3471
# * Copyright (C) 200 Matthias Clasen <mclasen@redhat.com>
3419
- COPYRIGHT: {<COPY> <COPY> <CD> <NAME-EMAIL>} #9999970
3472
+ COPYRIGHT: {<COPY> <COPY> <CD|CDS > <NAME-EMAIL>} #9999970
3420
3473
3421
3474
# <p class="copyright"><a href="http://www.w3.org/Consortium/Legal/ipr-notice-20000612#Copyright">Copyright</a>
3422
3475
COPYRIGHT: {<COPYRIGHT> <COPY>} #9999980
@@ -3803,6 +3856,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
3803
3856
'$' ,
3804
3857
'current.year' ,
3805
3858
"©" ,
3859
+ 'author' ,
3860
+ 'authors' ,
3806
3861
])
3807
3862
))
3808
3863
0 commit comments