|
69 | 69 | "warnings.simplefilter('always', AffiliationWarning)"
|
70 | 70 | ]
|
71 | 71 | },
|
72 |
| - { |
73 |
| - "cell_type": "code", |
74 |
| - "execution_count": null, |
75 |
| - "id": "14622700", |
76 |
| - "metadata": {}, |
77 |
| - "outputs": [], |
78 |
| - "source": [ |
79 |
| - "# additional CSS to consider. \n", |
80 |
| - "# TODO: Current into each exported file. \n", |
81 |
| - "# This should be set once into the webpages directly.\n", |
82 |
| - "# With only the class/id definitions in the .md files.\n", |
83 |
| - "\n", |
84 |
| - "debug_html = \"\"\"\n", |
85 |
| - "<style>\n", |
86 |
| - "#wrap{ overflow:auto; }\n", |
87 |
| - "#fig1{ background:yellow; width:100%; float:left; padding:5px; }\n", |
88 |
| - "#fig2{ background:red; width:50%; float:left; clear:left; padding:5px; }\n", |
89 |
| - "#fig3{ background:green; width:50%; float:left; padding:5px; }\n", |
90 |
| - ".macros{ background:yellow; visibility:visible;}\n", |
91 |
| - "h1 {margin: 0 0 0 0;}\n", |
92 |
| - "mark {background-color:#fff3b6;}\n", |
93 |
| - "img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}\n", |
94 |
| - "</style>\n", |
95 |
| - "\"\"\" \n", |
96 |
| - "html = \"\"\"\n", |
97 |
| - "<style>\n", |
98 |
| - "#wrap{ overflow:auto; }\n", |
99 |
| - "#fig1{ width:100%; float:left; padding: 5px; }\n", |
100 |
| - "#fig2{ width:50%; float:left; clear:left; padding: 5px; }\n", |
101 |
| - "#fig3{ width:50%; float:left; padding: 5px; }\n", |
102 |
| - ".macros{ visibility:hidden; height:0px; }\n", |
103 |
| - "h1 {margin: 0em 0 0 0;}\n", |
104 |
| - "mark {background-color:#fff3b6;}\n", |
105 |
| - "img {object-fit:contain; max-height:250px; display:inline-block; text-align: center;}\n", |
106 |
| - "</style>\n", |
107 |
| - "\"\"\"" |
108 |
| - ] |
109 |
| - }, |
110 | 72 | {
|
111 | 73 | "cell_type": "markdown",
|
112 | 74 | "id": "14bd6310",
|
|
128 | 90 | "# get list from MPIA website\n",
|
129 | 91 | "# it automatically filters identified non-scientists :func:`mpia.filter_non_scientists`\n",
|
130 | 92 | "mpia_authors = mpia.get_mpia_mitarbeiter_list()\n",
|
| 93 | + "normed_mpia_authors = [k[1] for k in mpia_authors] # initials + fullname\n", |
131 | 94 | "new_papers = get_new_papers()\n",
|
132 | 95 | "# add manual references\n",
|
133 | 96 | "add_paper_refs = []\n",
|
134 | 97 | "new_papers.extend([get_paper_from_identifier(k) for k in add_paper_refs])\n",
|
135 | 98 | "\n",
|
136 |
| - "# select only papers with matching author names and highlight authors\n", |
137 |
| - "hl_list = [k[0] for k in mpia_authors]\n", |
138 |
| - "\n", |
139 | 99 | "candidates = []\n",
|
140 | 100 | "for paperk in new_papers:\n",
|
141 |
| - " hl_authors = highlight_authors_in_list(paperk['authors'], hl_list)\n", |
| 101 | + " # Check author list with their initials\n", |
| 102 | + " normed_author_list = [mpia.get_initials(k) for k in paperk['authors']]\n", |
| 103 | + " hl_authors = highlight_authors_in_list(normed_author_list, normed_mpia_authors, verbose=True)\n", |
142 | 104 | " matches = [(hl, orig) for hl, orig in zip(hl_authors, paperk['authors']) if 'mark' in hl]\n",
|
143 | 105 | " paperk['authors'] = hl_authors\n",
|
144 | 106 | " if matches:\n",
|
| 107 | + " # only select paper if an author matched our list\n", |
145 | 108 | " candidates.append(paperk)\n",
|
146 | 109 | "print(\"\"\"Arxiv has {0:,d} new papers today\"\"\".format(len(new_papers))) \n",
|
147 | 110 | "print(\"\"\" {0:,d} with possible author matches\"\"\".format(len(candidates)))"
|
|
201 | 164 | " doc.comment = (get_markdown_badge(paper_id) + \n",
|
202 | 165 | " \"<mark>Appeared on: \" + paper['date'] + \"</mark> - \" +\n",
|
203 | 166 | " \"_\" + paper['comments'] + \"_\")\n",
|
204 |
| - " doc.highlight_authors_in_list(hl_list)\n", |
| 167 | + " # highlight authors (FIXME: doc.highlight_authors)\n", |
| 168 | + " doc._authors = highlight_authors_in_list(\n", |
| 169 | + " [mpia.get_initials(k) for k in doc.authors], \n", |
| 170 | + " normed_mpia_authors, verbose=True)\n", |
205 | 171 | "\n",
|
206 | 172 | " full_md = doc.generate_markdown_text()\n",
|
207 | 173 | " \n",
|
|
373 | 339 | },
|
374 | 340 | {
|
375 | 341 | "cell_type": "code",
|
376 |
| - "execution_count": 4, |
| 342 | + "execution_count": null, |
377 | 343 | "id": "cf665672",
|
378 | 344 | "metadata": {},
|
379 |
| - "outputs": [ |
380 |
| - { |
381 |
| - "name": "stdout", |
382 |
| - "output_type": "stream", |
383 |
| - "text": [ |
384 |
| - "184 publications files modified in the last 7 days.\n" |
385 |
| - ] |
386 |
| - } |
387 |
| - ], |
| 345 | + "outputs": [], |
388 | 346 | "source": [
|
389 | 347 | "from datetime import datetime, timedelta, timezone\n",
|
390 | 348 | "from glob import glob\n",
|
|
408 | 366 | },
|
409 | 367 | {
|
410 | 368 | "cell_type": "code",
|
411 |
| - "execution_count": 5, |
| 369 | + "execution_count": null, |
412 | 370 | "id": "015de740",
|
413 | 371 | "metadata": {},
|
414 |
| - "outputs": [ |
415 |
| - { |
416 |
| - "name": "stdout", |
417 |
| - "output_type": "stream", |
418 |
| - "text": [ |
419 |
| - "3 publications in the last 7 days.\n" |
420 |
| - ] |
421 |
| - } |
422 |
| - ], |
| 372 | + "outputs": [], |
423 | 373 | "source": [
|
424 | 374 | "import datetime\n",
|
425 | 375 | "from glob import glob\n",
|
|
462 | 412 | },
|
463 | 413 | {
|
464 | 414 | "cell_type": "code",
|
465 |
| - "execution_count": 6, |
| 415 | + "execution_count": null, |
466 | 416 | "id": "52ca0208",
|
467 | 417 | "metadata": {
|
468 | 418 | "scrolled": false
|
|
495 | 445 | },
|
496 | 446 | {
|
497 | 447 | "cell_type": "code",
|
498 |
| - "execution_count": 7, |
| 448 | + "execution_count": null, |
499 | 449 | "id": "a6eac5b6",
|
500 | 450 | "metadata": {
|
501 | 451 | "scrolled": false
|
|
519 | 469 | },
|
520 | 470 | {
|
521 | 471 | "cell_type": "code",
|
522 |
| - "execution_count": 10, |
| 472 | + "execution_count": null, |
523 | 473 | "id": "adc1a1ec",
|
524 | 474 | "metadata": {},
|
525 |
| - "outputs": [ |
526 |
| - { |
527 |
| - "name": "stdout", |
528 |
| - "output_type": "stream", |
529 |
| - "text": [ |
530 |
| - "1 publications in the last day.\n" |
531 |
| - ] |
532 |
| - } |
533 |
| - ], |
| 475 | + "outputs": [], |
534 | 476 | "source": [
|
535 | 477 | "# redo for today\n",
|
536 | 478 | "days = 1\n",
|
|
557 | 499 | },
|
558 | 500 | {
|
559 | 501 | "cell_type": "code",
|
560 |
| - "execution_count": 29, |
| 502 | + "execution_count": null, |
561 | 503 | "id": "00eece82",
|
562 | 504 | "metadata": {
|
563 | 505 | "scrolled": false
|
564 | 506 | },
|
565 |
| - "outputs": [ |
566 |
| - { |
567 |
| - "name": "stdout", |
568 |
| - "output_type": "stream", |
569 |
| - "text": [ |
570 |
| - "6 6 publications selected.\n" |
571 |
| - ] |
572 |
| - } |
573 |
| - ], |
| 507 | + "outputs": [], |
574 | 508 | "source": [
|
575 | 509 | "# Create the flat grid of the last N papers (fixed number regardless of dates)\n",
|
576 | 510 | "from itertools import islice \n",
|
|
598 | 532 | },
|
599 | 533 | {
|
600 | 534 | "cell_type": "code",
|
601 |
| - "execution_count": 20, |
| 535 | + "execution_count": null, |
602 | 536 | "id": "cab45692",
|
603 | 537 | "metadata": {},
|
604 | 538 | "outputs": [],
|
|
629 | 563 | "name": "python",
|
630 | 564 | "nbconvert_exporter": "python",
|
631 | 565 | "pygments_lexer": "ipython3",
|
632 |
| - "version": "3.11.2" |
| 566 | + "version": "3.10.10" |
633 | 567 | },
|
634 | 568 | "vscode": {
|
635 | 569 | "interpreter": {
|
|
0 commit comments