Skip to content

Commit 08910f5

Browse files
author
Morgan Fouesneau
committed
Author filter with initials
1 parent 1dd7052 commit 08910f5

File tree

3 files changed

+262
-210
lines changed

3 files changed

+262
-210
lines changed

docs/MPIA daily digest.ipynb

Lines changed: 21 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -69,44 +69,6 @@
6969
"warnings.simplefilter('always', AffiliationWarning)"
7070
]
7171
},
72-
{
73-
"cell_type": "code",
74-
"execution_count": null,
75-
"id": "14622700",
76-
"metadata": {},
77-
"outputs": [],
78-
"source": [
79-
"# additional CSS to consider. \n",
80-
"# TODO: Current into each exported file. \n",
81-
"# This should be set once into the webpages directly.\n",
82-
"# With only the class/id definitions in the .md files.\n",
83-
"\n",
84-
"debug_html = \"\"\"\n",
85-
"<style>\n",
86-
"#wrap{ overflow:auto; }\n",
87-
"#fig1{ background:yellow; width:100%; float:left; padding:5px; }\n",
88-
"#fig2{ background:red; width:50%; float:left; clear:left; padding:5px; }\n",
89-
"#fig3{ background:green; width:50%; float:left; padding:5px; }\n",
90-
".macros{ background:yellow; visibility:visible;}\n",
91-
"h1 {margin: 0 0 0 0;}\n",
92-
"mark {background-color:#fff3b6;}\n",
93-
"img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}\n",
94-
"</style>\n",
95-
"\"\"\" \n",
96-
"html = \"\"\"\n",
97-
"<style>\n",
98-
"#wrap{ overflow:auto; }\n",
99-
"#fig1{ width:100%; float:left; padding: 5px; }\n",
100-
"#fig2{ width:50%; float:left; clear:left; padding: 5px; }\n",
101-
"#fig3{ width:50%; float:left; padding: 5px; }\n",
102-
".macros{ visibility:hidden; height:0px; }\n",
103-
"h1 {margin: 0em 0 0 0;}\n",
104-
"mark {background-color:#fff3b6;}\n",
105-
"img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}\n",
106-
"</style>\n",
107-
"\"\"\""
108-
]
109-
},
11072
{
11173
"cell_type": "markdown",
11274
"id": "14bd6310",
@@ -128,20 +90,21 @@
12890
"# get list from MPIA website\n",
12991
"# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`\n",
13092
"mpia_authors = mpia.get_mpia_mitarbeiter_list()\n",
93+
"normed_mpia_authors = [k[1] for k in mpia_authors] # initials + fullname\n",
13194
"new_papers = get_new_papers()\n",
13295
"# add manual references\n",
13396
"add_paper_refs = []\n",
13497
"new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])\n",
13598
"\n",
136-
"# select only papers with matching author names and highlight authors\n",
137-
"hl_list = [k[0] for k in mpia_authors]\n",
138-
"\n",
13999
"candidates = []\n",
140100
"for paperk in new_papers:\n",
141-
" hl_authors = highlight_authors_in_list(paperk['authors'], hl_list)\n",
101+
" # Check author list with their initials\n",
102+
" normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]\n",
103+
" hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)\n",
142104
" matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]\n",
143105
" paperk['authors'] = hl_authors\n",
144106
" if matches:\n",
107+
" # only select paper if an author matched our list\n",
145108
" candidates.append(paperk)\n",
146109
"print(\"\"\"Arxiv has {0:,d} new papers today\"\"\".format(len(new_papers))) \n",
147110
"print(\"\"\" {0:,d} with possible author matches\"\"\".format(len(candidates)))"
@@ -201,7 +164,10 @@
201164
" doc.comment = (get_markdown_badge(paper_id) + \n",
202165
" \"<mark>Appeared on: \" + paper['date'] + \"</mark> - \" +\n",
203166
" \"_\" + paper['comments'] + \"_\")\n",
204-
" doc.highlight_authors_in_list(hl_list)\n",
167+
" # highlight authors (FIXME: doc.highlight_authors)\n",
168+
" doc._authors = highlight_authors_in_list(\n",
169+
" [mpia.get_initials(k) for k in doc.authors], \n",
170+
" normed_mpia_authors, verbose=True)\n",
205171
"\n",
206172
" full_md = doc.generate_markdown_text()\n",
207173
" \n",
@@ -373,18 +339,10 @@
373339
},
374340
{
375341
"cell_type": "code",
376-
"execution_count": 4,
342+
"execution_count": null,
377343
"id": "cf665672",
378344
"metadata": {},
379-
"outputs": [
380-
{
381-
"name": "stdout",
382-
"output_type": "stream",
383-
"text": [
384-
"184 publications files modified in the last 7 days.\n"
385-
]
386-
}
387-
],
345+
"outputs": [],
388346
"source": [
389347
"from datetime import datetime, timedelta, timezone\n",
390348
"from glob import glob\n",
@@ -408,18 +366,10 @@
408366
},
409367
{
410368
"cell_type": "code",
411-
"execution_count": 5,
369+
"execution_count": null,
412370
"id": "015de740",
413371
"metadata": {},
414-
"outputs": [
415-
{
416-
"name": "stdout",
417-
"output_type": "stream",
418-
"text": [
419-
"3 publications in the last 7 days.\n"
420-
]
421-
}
422-
],
372+
"outputs": [],
423373
"source": [
424374
"import datetime\n",
425375
"from glob import glob\n",
@@ -462,7 +412,7 @@
462412
},
463413
{
464414
"cell_type": "code",
465-
"execution_count": 6,
415+
"execution_count": null,
466416
"id": "52ca0208",
467417
"metadata": {
468418
"scrolled": false
@@ -495,7 +445,7 @@
495445
},
496446
{
497447
"cell_type": "code",
498-
"execution_count": 7,
448+
"execution_count": null,
499449
"id": "a6eac5b6",
500450
"metadata": {
501451
"scrolled": false
@@ -519,18 +469,10 @@
519469
},
520470
{
521471
"cell_type": "code",
522-
"execution_count": 10,
472+
"execution_count": null,
523473
"id": "adc1a1ec",
524474
"metadata": {},
525-
"outputs": [
526-
{
527-
"name": "stdout",
528-
"output_type": "stream",
529-
"text": [
530-
"1 publications in the last day.\n"
531-
]
532-
}
533-
],
475+
"outputs": [],
534476
"source": [
535477
"# redo for today\n",
536478
"days = 1\n",
@@ -557,20 +499,12 @@
557499
},
558500
{
559501
"cell_type": "code",
560-
"execution_count": 29,
502+
"execution_count": null,
561503
"id": "00eece82",
562504
"metadata": {
563505
"scrolled": false
564506
},
565-
"outputs": [
566-
{
567-
"name": "stdout",
568-
"output_type": "stream",
569-
"text": [
570-
"6 6 publications selected.\n"
571-
]
572-
}
573-
],
507+
"outputs": [],
574508
"source": [
575509
"# Create the flat grid of the last N papers (fixed number regardless of dates)\n",
576510
"from itertools import islice \n",
@@ -598,7 +532,7 @@
598532
},
599533
{
600534
"cell_type": "code",
601-
"execution_count": 20,
535+
"execution_count": null,
602536
"id": "cab45692",
603537
"metadata": {},
604538
"outputs": [],
@@ -629,7 +563,7 @@
629563
"name": "python",
630564
"nbconvert_exporter": "python",
631565
"pygments_lexer": "ipython3",
632-
"version": "3.11.2"
566+
"version": "3.10.10"
633567
},
634568
"vscode": {
635569
"interpreter": {

0 commit comments

Comments
 (0)