|
1 |
| -#!/usr/bin/python |
| 1 | +#!/usr/bin/python3.6 |
2 | 2 | # -*- coding: utf-8 -*-
|
3 | 3 |
|
4 |
| -#Example call on localhost: |
5 |
| -#http://localhost/coptic-nlp/api.py?data=%E2%B2%81%CF%A5%E2%B2%A5%E2%B2%B1%E2%B2%A7%E2%B2%99%20%E2%B2%9B%CF%AD%E2%B2%93%E2%B2%A1%E2%B2%A3%E2%B2%B1%E2%B2%99%E2%B2%89&lb=line |
6 |
| - |
7 |
| -from nlp_form import nlp_coptic |
8 |
| -import cgi, sys, re |
| 4 | +import requests |
| 5 | +import cgi, sys |
9 | 6 |
|
10 | 7 | PY3 = sys.version_info[0] == 3
|
11 | 8 | if PY3:
|
12 |
| - sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1) |
| 9 | + sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1) |
13 | 10 |
|
14 | 11 | storage = cgi.FieldStorage()
|
15 |
| -#storage = {"data":"ⲁϥⲥⲱⲧⲙ ⲛϭⲓⲡⲣⲱⲙⲉ"} |
| 12 | +#storage = {"data": "ⲁϥⲥⲱⲧⲙ ⲛϭⲓⲡⲣⲱⲙⲉ"} |
16 | 13 | if "data" in storage:
|
17 |
| - #data = storage["data"] |
18 |
| - data = storage.getvalue("data") |
| 14 | + #data = storage["data"] |
| 15 | + data = storage.getvalue("data") |
19 | 16 | else:
|
20 |
| - data = "" |
| 17 | + data = "" |
21 | 18 |
|
22 | 19 | # Diagnose detokenization needs
|
23 |
| -detok = 0 |
24 |
| -segment_merged = False |
25 |
| -orig_chars = ["̈", "", "̄", "̀", "̣", "`", "̅", "̈", "̂", "︤", "︥", "︦", "⳿", "~", "\n", "[", "]", "̇", "᷍", "⸍", "›", "‹"] |
26 |
| -clean = "".join([c for c in data if c not in orig_chars]) |
27 |
| -clean = re.sub(r'<[^<>]+>','',clean).replace(" ","_").replace("\n","").lower() |
28 |
| -preps = clean.count("_ϩⲛ_") + clean.count("_ⲙⲛ_") |
29 |
| -if preps > 4: |
30 |
| - detok = 1 |
31 |
| - segment_merged = True |
32 |
| - |
33 | 20 | if "lb" in storage:
|
34 |
| - line = storage.getvalue("lb") |
| 21 | + line = storage.getvalue("lb") |
35 | 22 | else:
|
36 |
| - if "<lb" in data: |
37 |
| - line = "noline" |
38 |
| - else: |
39 |
| - line = "line" |
| 23 | + if "<lb" in data: |
| 24 | + line = "noline" |
| 25 | + else: |
| 26 | + line = "line" |
40 | 27 |
|
41 | 28 | if "format" in storage:
|
42 |
| - format = storage.getvalue("format") |
| 29 | + format = storage.getvalue("format") |
43 | 30 | else:
|
44 |
| - format = "sgml" |
| 31 | + format = "sgml_no_parse" |
45 | 32 |
|
46 | 33 | if format not in ["conll", "pipes", "sgml_no_parse", "sgml_entities"]:
|
47 |
| - format = "sgml" |
48 |
| - |
49 |
| -if format == "pipes": |
50 |
| - print("Content-Type: text/plain; charset=UTF-8\n") |
51 |
| - processed = nlp_coptic(data,lb=line=="line",sgml_mode="pipes",do_tok=True, detokenize=detok, segment_merged=segment_merged) |
52 |
| - if "</lb>" in processed: |
53 |
| - processed = processed.replace("</lb>","</lb>\n") |
54 |
| - print(processed.strip()) |
55 |
| -elif format == "sgml_no_parse": |
56 |
| - print("Content-Type: text/sgml; charset=UTF-8\n") |
57 |
| - # secure call, note that htaccess prevents this running without authentication |
58 |
| - if "|" in data: |
59 |
| - processed = nlp_coptic(data, lb=line=="line", parse_only=False, do_tok=True, do_mwe=False, |
60 |
| - do_norm=True, do_tag=True, do_lemma=True, do_lang=True, |
61 |
| - do_milestone=True, do_parse=("no_parse" not in format), sgml_mode="sgml", |
62 |
| - tok_mode="from_pipes", old_tokenizer=False) |
63 |
| - else: |
64 |
| - processed = nlp_coptic(data, lb=line=="line", parse_only=False, do_tok=True, do_mwe=False, |
65 |
| - do_norm=True, do_tag=True, do_lemma=True, do_lang=True, |
66 |
| - do_milestone=True, do_parse=("no_parse" not in format), sgml_mode="sgml", |
67 |
| - tok_mode="auto", old_tokenizer=False) |
68 |
| - print(processed.strip() + "\n") |
69 |
| -elif format == "sgml_entities": |
70 |
| - print("Content-Type: text/sgml; charset=UTF-8\n\n") |
71 |
| - # secure call, note that htaccess prevents this running without authentication |
72 |
| - processed = nlp_coptic(data, lb=line == "line", parse_only=False, do_tok=False, do_mwe=False, |
73 |
| - do_norm=False, do_tag=False, do_lemma=False, do_lang=False, sent_tag="translation", |
74 |
| - do_milestone=True, do_parse=False, sgml_mode="sgml", merge_parse=True, |
75 |
| - tok_mode="auto", old_tokenizer=False, do_entities=True, pos_spans=True, preloaded={"stk":"","xrenner":None}) |
76 |
| - print(processed.strip() + "\n") |
77 |
| -elif format != "conll": |
78 |
| - print("Content-Type: text/"+format+"; charset=UTF-8\n") |
79 |
| - processed = nlp_coptic(data,lb=line=="line") |
80 |
| - print("<doc>\n"+processed.strip()+"\n</doc>\n") |
| 34 | + format = "sgml_no_parse" |
81 | 35 |
|
| 36 | +if "sgml" in format: |
| 37 | + print("Content-Type: text/sgml; charset=UTF-8\n") |
82 | 38 | else:
|
83 |
| - print("Content-Type: text/plain; charset=UTF-8\n") |
84 |
| - processed = nlp_coptic(data,lb=False,parse_only=True,do_tok=True,do_tag=True) |
85 |
| - print(processed.strip()) |
| 39 | + print("Content-Type: text/plain; charset=UTF-8\n") |
86 | 40 |
|
| 41 | +params = {"data":data,"lb":line,"format":format} |
| 42 | +result = requests.post("http://localhost:5555/",params=params) |
| 43 | +print(result.content.decode("utf8")) |
0 commit comments