Skip to content

Commit c09cd3b

Browse files
committed
Merge branch 'develop'
2 parents c3053db + c53aac2 commit c09cd3b

File tree

11 files changed

+230
-27
lines changed

11 files changed

+230
-27
lines changed

CI.Jenkinsfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ def runSonnarForPythonVersion(sourceDir, ver){
3434
pip3 install tox && \
3535
cd /source && \
3636
tox && \
37-
${sonarExec}\""
37+
${sonarExec} && \
38+
echo && \
39+
echo [INFO] Re-permission files for cleanup. && \
40+
chown -R 9960:9960 /source\""
3841
}
3942

4043
node ("docker-light") {

README.md

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
1-
<a href="https://www.babelstreet.com/rosette"><img src="https://s3.amazonaws.com/styleguide.basistech.com/logos/rosette-logo.png" width="181" height="47" /></a>
2-
3-
---
1+
<a href="https://www.babelstreet.com/rosette"><img src="https://charts.babelstreet.com/icon.png" width="47" height="60"/></a>
2+
# Rosette by Babel Street
43

54
[![PyPI version](https://badge.fury.io/py/rosette-api.svg)](https://badge.fury.io/py/rosette-api)
65
[![Python Versions](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions)](https://img.shields.io/pypi/pyversions/rosette-api.svg?color=dark%20green&label=Python%20Versions)
76

8-
## Rosette API
9-
The Rosette Text Analytics Platform uses natural language processing, statistical modeling, and machine learning to
10-
analyze unstructured and semi-structured text across 364 language-encoding-script combinations, revealing valuable
11-
information and actionable data. Rosette provides endpoints for extracting entities and relationships, translating and
12-
comparing the similarity of names, categorizing and adding linguistic tags to text and more.
7+
Rosette uses natural language processing, statistical modeling, and machine learning to analyze unstructured and semi-structured text across hundreds of language-script combinations, revealing valuable information and actionable data. Rosette provides endpoints for extracting entities and relationships, translating and comparing the similarity of names, categorizing and adding linguistic tags to text and more. Rosette Server is the on-premises installation of Rosette, with access to Rosette's functions as RESTful web service endpoints. This solves cloud security worries and allows customization (models/indexes) as needed for your business.
138

149
## Rosette API Access
1510
- Rosette Cloud [Sign Up](https://developer.rosette.com/signup)
@@ -25,11 +20,10 @@ in the [examples](https://github.com/rosette-api/python/tree/develop/examples) d
2520

2621
#### Documentation & Support
2722
- [Binding API](https://rosette-api.github.io/python/)
28-
- [Rosette Platform API](https://developer.rosette.com/features-and-functions)
23+
- [Rosette Platform API](https://docs.babelstreet.com/API/en/index-en.html)
2924
- [Binding Release Notes](https://github.com/rosette-api/python/wiki/Release-Notes)
30-
- [Rosette Platform Release Notes](https://support.rosette.com/hc/en-us/articles/360018354971-Release-Notes)
31-
- [Binding/Rosette Platform Compatibility](https://developer.rosette.com/features-and-functions?python#)
32-
- [Support](https://support.rosette.com)
25+
- [Rosette Platform Release Notes](https://babelstreet.my.site.com/support/s/article/Rosette-Cloud-Release-Notes)
26+
- [Support](https://babelstreet.my.site.com/support/s/)
3327
- [Binding License: Apache 2.0](https://github.com/rosette-api/python/blob/develop/LICENSE.txt)
3428

3529
## Binding Developer Information

docs/source/conf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,17 @@
4747

4848
# General information about the project.
4949
project = ''
50-
copyright = '2022, Basis Technology'
50+
copyright = '2024, Basis Technology'
5151
author = 'Basis Technology'
5252

5353
# The version info for the project you're documenting, acts as replacement for
5454
# |version| and |release|, also used in various other places throughout the
5555
# built documents.
5656
#
5757
# The short X.Y version.
58-
version = '1.28.0'
58+
version = '1.29.0'
5959
# The full version, including alpha/beta/rc tags.
60-
release = '1.28.0'
60+
release = '1.29.0'
6161

6262
# The language for content autogenerated by Sphinx. Refer to documentation
6363
# for a list of supported languages.

examples/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ python ping.py -k $API_KEY
3232
```
3333
git clone git@github.com:rosette-api/python.git
3434
cd python/examples
35-
docker run -it -v $(pwd):/source --entrypoint bash python:3.6-slim
35+
docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim
3636
cd /source
3737
pip install rosette_api
3838
python ping.py -k $API_KEY
@@ -42,7 +42,7 @@ python ping.py -k $API_KEY
4242
```
4343
git clone git@github.com:rosette-api/python.git
4444
cd python
45-
docker run -it -v $(pwd):/source --entrypoint bash python:3.6-slim
45+
docker run -it -v $(pwd):/source --entrypoint bash python:3.12-slim
4646
cd /source
4747
python setup.py install
4848
cd examples

examples/entities.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ def run(key, alt_url='https://api.rosette.com/rest/v1/'):
2222

2323
# api.set_option('calculateSalience','true')
2424
# api.set_option('linkEntities','false')
25+
# api.set_option('useIndocServer', True)
2526

2627
entities_text_data = "The Securities and Exchange Commission today announced the leadership of the agency’s trial unit. Bridget Fitzpatrick has been named Chief Litigation Counsel of the SEC and David Gottesman will continue to serve as the agency’s Deputy Chief Litigation Counsel. Since December 2016, Ms. Fitzpatrick and Mr. Gottesman have served as Co-Acting Chief Litigation Counsel. In that role, they were jointly responsible for supervising the trial unit at the agency’s Washington D.C. headquarters as well as coordinating with litigators in the SEC’s 11 regional offices around the country."
2728
params = DocumentParameters()

examples/events.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
Example code to call Rosette API to get entities from a piece of text.
3+
Example code to call Rosette API to get events from a piece of text.
44
"""
55

66
import argparse

examples/record_similarity.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Example code to call Rosette API to get similarity score between a list of records
4+
"""
5+
6+
import argparse
7+
import json
8+
import os
9+
10+
from rosette.api import API, RecordSimilarityParameters, RosetteException
11+
12+
13+
def run(key, alt_url='https://api.rosette.com/rest/v1/'):
14+
""" Run the example """
15+
# Create an API instance
16+
api = API(user_key=key, service_url=alt_url)
17+
18+
fields = {
19+
"primaryName": {
20+
"type": "rni_name",
21+
"weight": 0.5
22+
},
23+
"dob": {
24+
"type": "rni_date",
25+
"weight": 0.2
26+
},
27+
"addr": {
28+
"type": "rni_address",
29+
"weight": 0.5
30+
},
31+
"dob2": {
32+
"type": "rni_date",
33+
"weight": 0.1
34+
}
35+
}
36+
properties = {
37+
"threshold": 0.7,
38+
"includeExplainInfo": True
39+
}
40+
records = {
41+
"left": [
42+
{
43+
"primaryName": {
44+
"text": "Ethan R",
45+
"entityType": "PERSON",
46+
"language": "eng",
47+
"languageOfOrigin": "eng",
48+
"script": "Latn"
49+
},
50+
"dob": "1993-04-16",
51+
"addr": "123 Roadlane Ave",
52+
"dob2": {
53+
"date": "1993/04/16"
54+
}
55+
},
56+
{
57+
"dob": {
58+
"date": "1993-04-16"
59+
},
60+
"primaryName": {
61+
"text": "Evan R"
62+
}
63+
}
64+
],
65+
"right": [
66+
{
67+
"dob": {
68+
"date": "1993-04-16"
69+
},
70+
"primaryName": {
71+
"text": "Seth R",
72+
"language": "eng"
73+
}
74+
},
75+
{
76+
"primaryName": "Ivan R",
77+
"dob": {
78+
"date": "1993-04-16"
79+
},
80+
"addr": {
81+
"address": "123 Roadlane Ave"
82+
},
83+
"dob2": {
84+
"date": "1993/04/16"
85+
}
86+
}
87+
]
88+
}
89+
params = RecordSimilarityParameters()
90+
params["fields"] = fields
91+
params["properties"] = properties
92+
params["records"] = records
93+
94+
try:
95+
return api.record_similarity(params)
96+
except RosetteException as exception:
97+
print(exception)
98+
99+
100+
PARSER = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter,
101+
description='Calls the ' +
102+
os.path.splitext(os.path.basename(__file__))[0] + ' endpoint')
103+
PARSER.add_argument('-k', '--key', help='Rosette API Key', required=True)
104+
PARSER.add_argument('-u', '--url', help="Alternative API URL",
105+
default='https://api.rosette.com/rest/v1/')
106+
107+
if __name__ == '__main__':
108+
ARGS = PARSER.parse_args()
109+
RESULT = run(ARGS.key, ARGS.url)
110+
print(RESULT)

rosette/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
limitations under the License.
1313
"""
1414

15-
__version__ = '1.28.0'
15+
__version__ = '1.29.0'

rosette/api.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"""
44
Python client for the Rosette API.
55
6-
Copyright (c) 2014-2022 Basis Technology Corporation.
6+
Copyright (c) 2014-2024 Basis Technology Corporation.
77
88
Licensed under the Apache License, Version 2.0 (the "License");
99
you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@
3030

3131
_APPLICATION_JSON = 'application/json'
3232
_BINDING_LANGUAGE = 'python'
33-
_BINDING_VERSION = '1.28.0'
33+
_BINDING_VERSION = '1.29.0'
3434
_CONCURRENCY_HEADER = 'x-rosetteapi-concurrency'
3535
_CUSTOM_HEADER_PREFIX = 'X-RosetteAPI-'
3636
_CUSTOM_HEADER_PATTERN = re.compile('^' + _CUSTOM_HEADER_PREFIX)
@@ -345,6 +345,29 @@ def validate(self):
345345
repr("names"))
346346

347347

348+
class RecordSimilarityParameters(_RequestParametersBase):
349+
"""Parameter object for C{record-similarity} endpoint.
350+
Required:
351+
C{records} The records to be compared; where each left record is compared to the associated right record.
352+
C{properties} Parameters used in the call
353+
C{fields} The definition of the fields used in the comparison. There must be a minimum of 1 field and
354+
can have a maximum of 5 fields.
355+
"""
356+
357+
def __init__(self):
358+
self.use_multipart = False
359+
_RequestParametersBase.__init__(self, ("fields", "properties", "records"))
360+
361+
def validate(self):
362+
"""Internal. Do not use."""
363+
for option in "fields", "properties", "records": # required
364+
if self[option] is None:
365+
raise RosetteException(
366+
"missingParameter",
367+
"Required Record Similarity parameter is missing: " + option,
368+
repr(option))
369+
370+
348371
class EndpointCaller(object):
349372
"""L{EndpointCaller} objects are invoked via their instance methods to obtain results
350373
from the Rosette server described by the L{API} object from which they
@@ -592,7 +615,8 @@ def __init__(
592615
'TOKENS': 'tokens',
593616
'TOPICS': 'topics',
594617
'TRANSLITERATION': 'transliteration',
595-
'EVENTS': 'events'
618+
'EVENTS': 'events',
619+
'RECORD_SIMILARITY': 'record-similarity'
596620
}
597621

598622
def __del__(self):
@@ -966,6 +990,15 @@ def name_deduplication(self, parameters):
966990
@return: A python dictionary containing the results of de-duplication"""
967991
return EndpointCaller(self, self.endpoints['NAME_DEDUPLICATION']).call(parameters, NameDeduplicationParameters)
968992

993+
def record_similarity(self, parameters):
994+
"""
995+
Create an L{EndpointCaller} to get similarity core between a list of records and call it.
996+
@param parameters: An object specifying the data,
997+
and possible metadata, to be processed by the record matcher.
998+
@type parameters: L{RecordSimilarityParameters}
999+
@return: A python dictionary containing the results of record matching."""
1000+
return EndpointCaller(self, self.endpoints['RECORD_SIMILARITY']).call(parameters, RecordSimilarityParameters)
1001+
9691002
def text_embedding(self, parameters):
9701003
""" deprecated
9711004
Create an L{EndpointCaller} to identify text vectors found in the texts

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
NAME = "rosette_api"
99
DESCRIPTION = "Rosette API Python client SDK"
10-
AUTHOR = "Basis Technology Corp."
11-
AUTHOR_EMAIL = "support@rosette.com"
10+
AUTHOR = "Rosette by Babel Street"
11+
AUTHOR_EMAIL = "helpdesk@babelstreet.com"
1212
HOMEPAGE = "https://github.com/rosette-api/python"
1313
VERSION = rosette.__version__
1414

tests/test_rosette_api.py

Lines changed: 64 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22

33
"""
4-
Copyright (c) 2014-2022 Basis Technology Corporation.
4+
Copyright (c) 2014-2024 Basis Technology Corporation.
55
66
Licensed under the Apache License, Version 2.0 (the "License");
77
you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@
2929
NameTranslationParameters,
3030
NameSimilarityParameters,
3131
NameDeduplicationParameters,
32+
RecordSimilarityParameters,
3233
RosetteException)
3334

3435
_ISPY3 = sys.version_info[0] == 3
@@ -484,6 +485,10 @@ def test_the_name_requests_with_text(api, json_response):
484485
result = api.address_similarity("should fail")
485486
assert e_rosette.value.status == 'incompatible'
486487

488+
with pytest.raises(RosetteException) as e_rosette:
489+
result = api.record_similarity("should fail")
490+
assert e_rosette.value.status == 'incompatible'
491+
487492
httpretty.disable()
488493
httpretty.reset()
489494

@@ -958,4 +963,61 @@ def test_the_events_endpoint(api, json_response, doc_params):
958963
result = api.events(doc_params)
959964
assert result["name"] == "Rosette"
960965
httpretty.disable()
961-
httpretty.reset()
966+
httpretty.reset()
967+
968+
# Test the record similarity endpoint
969+
970+
971+
def test_the_record_similarity_endpoint(api, json_response):
972+
"""Test the record similarity endpoint"""
973+
httpretty.enable()
974+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/record-similarity",
975+
body=json_response, status=200, content_type="application/json")
976+
977+
params = RecordSimilarityParameters()
978+
params["fields"] = {}
979+
params["properties"] = {}
980+
params["records"] = {}
981+
result = api.record_similarity(params)
982+
assert result["name"] == "Rosette"
983+
httpretty.disable()
984+
httpretty.reset()
985+
986+
987+
# Tests for required record-similarities parameters
988+
def test_for_record_similarity_required_parameters(api, json_response):
989+
"""Test record similarity parameters"""
990+
httpretty.enable()
991+
httpretty.register_uri(httpretty.POST, "https://api.rosette.com/rest/v1/record-similarity",
992+
body=json_response, status=200, content_type="application/json")
993+
994+
params = RecordSimilarityParameters()
995+
996+
with pytest.raises(RosetteException) as e_rosette:
997+
api.record_similarity(params)
998+
999+
assert e_rosette.value.status == 'missingParameter'
1000+
assert e_rosette.value.message == 'Required Record Similarity parameter is missing: fields'
1001+
1002+
params["fields"] = {}
1003+
1004+
with pytest.raises(RosetteException) as e_rosette:
1005+
api.record_similarity(params)
1006+
1007+
assert e_rosette.value.status == 'missingParameter'
1008+
assert e_rosette.value.message == 'Required Record Similarity parameter is missing: properties'
1009+
1010+
params["properties"] = {}
1011+
1012+
with pytest.raises(RosetteException) as e_rosette:
1013+
api.record_similarity(params)
1014+
1015+
assert e_rosette.value.status == 'missingParameter'
1016+
assert e_rosette.value.message == 'Required Record Similarity parameter is missing: records'
1017+
1018+
params["records"] = {}
1019+
1020+
result = api.record_similarity(params)
1021+
assert result["name"] == "Rosette"
1022+
httpretty.disable()
1023+
httpretty.reset()

0 commit comments

Comments
 (0)