@@ -806,6 +806,12 @@ def build_detection_from_node(
806
806
# verbatime star
807
807
(r'^\*$' , 'JUNK' ),
808
808
809
+ # misc company names exception to next rule
810
+ (r'^TinCanTools$' , 'NNP' ),
811
+ (r'^SoftwareBitMaker$' , 'NNP' ),
812
+ (r'^NetCommWireless$' , 'NNP' ),
813
+
814
+ # Repeated CamelCasedWords
809
815
(r'^([A-Z][a-z]+){3,}$' , 'JUNK' ),
810
816
811
817
############################################################################
@@ -1079,7 +1085,7 @@ def build_detection_from_node(
1079
1085
(r'^whom$' , 'JUNK' ),
1080
1086
(r'^However,?$' , 'JUNK' ),
1081
1087
(r'^[Cc]ollectively$' , 'JUNK' ),
1082
- (r'^following$' , 'JUNK ' ),
1088
+ (r'^following$' , 'FOLLOWING ' ),
1083
1089
(r'^[Cc]onfig$' , 'JUNK' ),
1084
1090
(r'^file\.$' , 'JUNK' ),
1085
1091
@@ -1184,7 +1190,7 @@ def build_detection_from_node(
1184
1190
(r'^[a-z]{3,10}[A-Z][a-z]{3,10}$' , 'JUNK' ),
1185
1191
1186
1192
(r'^\$?Guid$' , 'JUNK' ),
1187
- (r'^Small$' , 'NN' ),
1193
+ # (r'^Small$', 'NN'),
1188
1194
(r'^implementing$' , 'JUNK' ),
1189
1195
(r'^Unlike$' , 'JUNK' ),
1190
1196
(r'^using$' , 'JUNK' ),
@@ -1206,6 +1212,11 @@ def build_detection_from_node(
1206
1212
# single period
1207
1213
(r"^\.$" , 'JUNK' ),
1208
1214
1215
+ # exception to the next rule
1216
+
1217
+ # by PaX Team
1218
+ (r"PaX$" , 'NN' ),
1219
+
1209
1220
# short mixed caps with trailing cap: ZoY
1210
1221
(r"[A-Z][a-z][A-Z]$" , 'JUNK' ),
1211
1222
@@ -1405,6 +1416,7 @@ def build_detection_from_node(
1405
1416
(r'^STA$' , 'NN' ),
1406
1417
(r'^Page$' , 'NN' ),
1407
1418
(r'^Todo/Under$' , 'JUNK' ),
1419
+ (r'^Under$' , 'NN' ),
1408
1420
1409
1421
(r'^Interrupt$' , 'NN' ),
1410
1422
(r'^cleanups?$' , 'JUNK' ),
@@ -1668,6 +1680,8 @@ def build_detection_from_node(
1668
1680
(r'^([Mm]onday|[Tt]uesday|[Ww]ednesday|[Tt]hursday|[Ff]riday|[Ss]aturday|[Ss]unday),?$' , 'DAY' ),
1669
1681
(r'^(Mon|Tue|Wed|Thu|Fri|Sat|Sun|May),?$' , 'NN' ),
1670
1682
1683
+ (r'^[Dd]ebugging$' , 'JUNK' ),
1684
+
1671
1685
# misc words that are not NNs
1672
1686
# lowercase verbs ending in "ing"
1673
1687
(r'^[a-z]+ing$' , 'NN' ),
@@ -1700,6 +1714,9 @@ def build_detection_from_node(
1700
1714
(r'^Moved$' , 'NN' ),
1701
1715
(r'^Phone$' , 'NN' ),
1702
1716
1717
+ (r'^Inputs?$' , 'NN' ),
1718
+
1719
+
1703
1720
# dual caps that are not NNP
1704
1721
(r'^Make[A-Z]' , 'JUNK' ),
1705
1722
(r'^Create[A-Z]' , 'JUNK' ),
@@ -2069,6 +2086,7 @@ def build_detection_from_node(
2069
2086
# and Spanish/French Da Siva and De Gaulle
2070
2087
(r'^(([Vv][ao]n)|[Dd][aeu])$' , 'VAN' ),
2071
2088
2089
+ (r'^aan$' , 'OF' ),
2072
2090
(r'^van$' , 'VAN' ),
2073
2091
(r'^Van$' , 'VAN' ),
2074
2092
(r'^von$' , 'VAN' ),
@@ -2289,6 +2307,9 @@ def build_detection_from_node(
2289
2307
# some punctuation combos
2290
2308
(r'^(?:=>|->|<-|<=)$' , 'JUNK' ),
2291
2309
2310
+ (r'^semiconductors?[\.,]?$' , 'NNP' ),
2311
+
2312
+
2292
2313
############################################################################
2293
2314
# catch all other as Nouns
2294
2315
############################################################################
@@ -2320,6 +2341,10 @@ def build_detection_from_node(
2320
2341
2321
2342
CD: {<BARE-YR>} #bareyear
2322
2343
2344
+ # 5 Jan 2003
2345
+ YR-RANGE: {<CD> <NNP> <YR-RANGE>} #72.3
2346
+
2347
+
2323
2348
#######################################
2324
2349
# All/No/Some Rights Reserved
2325
2350
#######################################
@@ -2344,6 +2369,9 @@ def build_detection_from_node(
2344
2369
# foo@bar.com or baz@bar.com
2345
2370
EMAIL: {<EMAIL> <NN> <EMAIL>} # email or email
2346
2371
2372
+ # <srinivasa.deevi at conexant dot com>
2373
+ EMAIL: {<EMAIL_START> <CC> <NN> <DOT> <NN> } #email with brackets
2374
+
2347
2375
#######################################
2348
2376
# NAMES and COMPANIES
2349
2377
#######################################
@@ -2559,18 +2587,21 @@ def build_detection_from_node(
2559
2587
NAME: {<NAME|NAME-EMAIL>+ <OF> <NNP> <OF> <NN>? <COMPANY>} #550
2560
2588
NAME: {<NAME|NAME-EMAIL>+ <CC|OF>? <NAME|NAME-EMAIL|COMPANY>} #560
2561
2589
2562
- NAME: {<NNP><NNP>} #5611
2590
+ NAME: {<NNP><NNP>} #561
2563
2591
2564
2592
# strip Software from Copyright (c) Ian Darwin 1995. Software
2565
- NAME-YEAR: {<NAME>+ <YR-RANGE>} #5611
2593
+ NAME-YEAR: {<NAME>+ <YR-RANGE>} #561.1
2566
2594
2567
2595
# Copyright 2018, OpenCensus Authors
2568
- COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #1579991
2596
+ COPYRIGHT: {<COPY>+ <YR-RANGE> <NNP> <AUTHS>} #561.2
2597
+
2598
+ # Tom aan de Wiel
2599
+ NAME: {<NNP> <OF> <VAN> <NNP> } # 561.3
2569
2600
2570
- NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #5612
2601
+ NAME-YEAR: {<YR-RANGE> <NNP>+ <CAPS>? <LINUX>?} #562
2571
2602
2572
2603
#Academy of Motion Picture Arts and Sciences
2573
- NAME: {<NAME> <CC> <NNP>} #561
2604
+ NAME: {<NAME> <CC> <NNP>} #563
2574
2605
2575
2606
# Adam Weinberger and the GNOME Foundation
2576
2607
ANDCO: {<CC> <NN> <COMPANY>} #565
@@ -2582,6 +2613,8 @@ def build_detection_from_node(
2582
2613
2583
2614
URL: {<PARENS> <URL> <PARENS>} #5700
2584
2615
2616
+ NAME-YEAR: {<NAME-YEAR> <CD> <NNP>} #5700.1
2617
+
2585
2618
#also accept trailing email and URLs
2586
2619
# and "VAN" e.g. Du: Copyright (c) 2008 Alek Du <alek.du@intel.com>
2587
2620
NAME-YEAR: {<NAME-YEAR> <VAN>? <EMAIL>?<URL>?} #5701
@@ -2984,7 +3017,11 @@ def build_detection_from_node(
2984
3017
2985
3018
# Russ Dill <Russ.Dill@asu.edu> 2001-2003
2986
3019
# Rewrited by Vladimir Oleynik <dzo@simtreas.ru> (C) 2003
2987
- COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #22793.5
3020
+ COPYRIGHT: {<NAME-EMAIL> <YR-RANGE> <AUTH2> <BY> <NAME-EMAIL> <COPY> <YR-RANGE>} #2280-2
3021
+
3022
+ # Copyright (C) 2018
3023
+ # Author: Jeff LaBundy <jeff@labundy.com>
3024
+ COPYRIGHT: {<COPY> <COPY> <YR-RANGE> <AUTH> <NAME-EMAIL>} #2280-3
2988
3025
2989
3026
COPYRIGHT2: {<COPY>+ <NN|CAPS>? <YR-RANGE>+ <PN>*} #2280
2990
3027
@@ -3259,6 +3296,9 @@ def build_detection_from_node(
3259
3296
# copyrighted by the Open Source Vulnerability Database (http://osvdb.org)
3260
3297
COPYRIGHT: {<COPY> <BY> <NN|NNP>{3} <NAME>} #83002.1
3261
3298
3299
+ # (C) by the respective authors,
3300
+ <COPYRIGHT>: { <COPY> <BY> <NN> <NN> <AUTHDOT>} #83002.2
3301
+
3262
3302
# weird //opylefted by <-Harvie 2oo7
3263
3303
COPYRIGHT: {<COPY> <BY> <NN> <NN> <MAINT>?} #83003
3264
3304
@@ -3302,6 +3342,14 @@ def build_detection_from_node(
3302
3342
# Gracenote Software, copyright © 2000-2008 Gracenote.
3303
3343
COPYRIGHT: {<COMPANY> <COPY>{1,2} <NAME-YEAR>} #157999.12
3304
3344
3345
+ #Copyright (C) 2012-2016 by the following authors:
3346
+ #- Wladimir J. van der Laan <laanwj@gmail.com>
3347
+
3348
+ NAME-EMAIL: {<NNP> <NAME-EMAIL> } #157999.13
3349
+ NAME-EMAIL: {<DASH> <NAME-EMAIL> <NN>?} #157999.14
3350
+ COPYRIGHT: {<COPYRIGHT2> <FOLLOWING> <AUTHS> <NAME-EMAIL>+ } #157999.14
3351
+
3352
+
3305
3353
#######################################
3306
3354
# Copyright is held by ....
3307
3355
#######################################
@@ -3804,6 +3852,8 @@ def is_junk_copyright(s, patterns=COPYRIGHTS_JUNK_PATTERN_MATCHERS):
3804
3852
'$' ,
3805
3853
'current.year' ,
3806
3854
"©" ,
3855
+ 'author' ,
3856
+ 'authors' ,
3807
3857
])
3808
3858
))
3809
3859
0 commit comments