@@ -1159,6 +1159,87 @@ def update_training_query(self,
1159
1159
response = self .send (request )
1160
1160
return response
1161
1161
1162
+ #########################
1163
+ # analyze
1164
+ #########################
1165
+
1166
+ def analyze_document (self ,
1167
+ project_id : str ,
1168
+ collection_id : str ,
1169
+ * ,
1170
+ file : BinaryIO = None ,
1171
+ filename : str = None ,
1172
+ file_content_type : str = None ,
1173
+ metadata : str = None ,
1174
+ ** kwargs ) -> 'DetailedResponse' :
1175
+ """
1176
+ Analyze a Document.
1177
+
1178
+ Process a document using the specified collection's settings and return it for
1179
+ realtime use.
1180
+ **Note:** Documents processed using this method are not added to the specified
1181
+ collection.
1182
+ **Note:** This method is only supported on IBM Cloud Pak for Data instances of
1183
+ Discovery.
1184
+
1185
+ :param str project_id: The ID of the project. This information can be found
1186
+ from the deploy page of the Discovery administrative tooling.
1187
+ :param str collection_id: The ID of the collection.
1188
+ :param TextIO file: (optional) The content of the document to ingest. The
1189
+ maximum supported file size when adding a file to a collection is 50
1190
+ megabytes, the maximum supported file size when testing a configuration is
1191
+ 1 megabyte. Files larger than the supported size are rejected.
1192
+ :param str filename: (optional) The filename for file.
1193
+ :param str file_content_type: (optional) The content type of file.
1194
+ :param str metadata: (optional) The maximum supported metadata file size is
1195
+ 1 MB. Metadata parts larger than 1 MB are rejected.
1196
+ Example: ``` {
1197
+ "Creator": "Johnny Appleseed",
1198
+ "Subject": "Apples"
1199
+ } ```.
1200
+ :param dict headers: A `dict` containing the request headers
1201
+ :return: A `DetailedResponse` containing the result, headers and HTTP status code.
1202
+ :rtype: DetailedResponse
1203
+ """
1204
+
1205
+ if project_id is None :
1206
+ raise ValueError ('project_id must be provided' )
1207
+ if collection_id is None :
1208
+ raise ValueError ('collection_id must be provided' )
1209
+
1210
+ headers = {}
1211
+ if 'headers' in kwargs :
1212
+ headers .update (kwargs .get ('headers' ))
1213
+ sdk_headers = get_sdk_headers (service_name = self .DEFAULT_SERVICE_NAME ,
1214
+ service_version = 'V2' ,
1215
+ operation_id = 'analyze_document' )
1216
+ headers .update (sdk_headers )
1217
+
1218
+ params = {'version' : self .version }
1219
+
1220
+ form_data = []
1221
+ if file :
1222
+ if not filename and hasattr (file , 'name' ):
1223
+ filename = basename (file .name )
1224
+ if not filename :
1225
+ raise ValueError ('filename must be provided' )
1226
+ form_data .append (('file' , (filename , file , file_content_type or
1227
+ 'application/octet-stream' )))
1228
+ if metadata :
1229
+ metadata = str (metadata )
1230
+ form_data .append (('metadata' , (None , metadata , 'text/plain' )))
1231
+
1232
+ url = '/v2/projects/{0}/collections/{1}/analyze' .format (
1233
+ * self ._encode_path_vars (project_id , collection_id ))
1234
+ request = self .prepare_request (method = 'POST' ,
1235
+ url = url ,
1236
+ headers = headers ,
1237
+ params = params ,
1238
+ files = form_data )
1239
+
1240
+ response = self .send (request )
1241
+ return response
1242
+
1162
1243
#########################
1163
1244
# enrichments
1164
1245
#########################
@@ -1224,7 +1305,6 @@ def create_enrichment(self,
1224
1305
if enrichment is None :
1225
1306
raise ValueError ('enrichment must be provided' )
1226
1307
1227
- print (enrichment )
1228
1308
headers = {}
1229
1309
if 'headers' in kwargs :
1230
1310
headers .update (kwargs .get ('headers' ))
@@ -1662,11 +1742,173 @@ class FileContentType(Enum):
1662
1742
APPLICATION_XHTML_XML = 'application/xhtml+xml'
1663
1743
1664
1744
1745
+ class AnalyzeDocumentEnums (object ):
1746
+
1747
+ class FileContentType (Enum ):
1748
+ """
1749
+ The content type of file.
1750
+ """
1751
+ APPLICATION_JSON = 'application/json'
1752
+ APPLICATION_MSWORD = 'application/msword'
1753
+ APPLICATION_VND_OPENXMLFORMATS_OFFICEDOCUMENT_WORDPROCESSINGML_DOCUMENT = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
1754
+ APPLICATION_PDF = 'application/pdf'
1755
+ TEXT_HTML = 'text/html'
1756
+ APPLICATION_XHTML_XML = 'application/xhtml+xml'
1757
+
1758
+
1665
1759
##############################################################################
1666
1760
# Models
1667
1761
##############################################################################
1668
1762
1669
1763
1764
+ class AnalyzedDocument ():
1765
+ """
1766
+ An object containing the converted document and any identifed enrichments.
1767
+
1768
+ :attr List[Notice] notices: (optional) Array of document results that match the
1769
+ query.
1770
+ :attr AnalyzedResult result: (optional) Result of the document analysis.
1771
+ """
1772
+
1773
+ def __init__ (self ,
1774
+ * ,
1775
+ notices : List ['Notice' ] = None ,
1776
+ result : 'AnalyzedResult' = None ) -> None :
1777
+ """
1778
+ Initialize a AnalyzedDocument object.
1779
+
1780
+ :param List[Notice] notices: (optional) Array of document results that
1781
+ match the query.
1782
+ :param AnalyzedResult result: (optional) Result of the document analysis.
1783
+ """
1784
+ self .notices = notices
1785
+ self .result = result
1786
+
1787
+ @classmethod
1788
+ def from_dict (cls , _dict : Dict ) -> 'AnalyzedDocument' :
1789
+ """Initialize a AnalyzedDocument object from a json dictionary."""
1790
+ args = {}
1791
+ valid_keys = ['notices' , 'result' ]
1792
+ bad_keys = set (_dict .keys ()) - set (valid_keys )
1793
+ if bad_keys :
1794
+ raise ValueError (
1795
+ 'Unrecognized keys detected in dictionary for class AnalyzedDocument: '
1796
+ + ', ' .join (bad_keys ))
1797
+ if 'notices' in _dict :
1798
+ args ['notices' ] = [
1799
+ Notice ._from_dict (x ) for x in (_dict .get ('notices' ))
1800
+ ]
1801
+ if 'result' in _dict :
1802
+ args ['result' ] = AnalyzedResult ._from_dict (_dict .get ('result' ))
1803
+ return cls (** args )
1804
+
1805
+ @classmethod
1806
+ def _from_dict (cls , _dict ):
1807
+ """Initialize a AnalyzedDocument object from a json dictionary."""
1808
+ return cls .from_dict (_dict )
1809
+
1810
+ def to_dict (self ) -> Dict :
1811
+ """Return a json dictionary representing this model."""
1812
+ _dict = {}
1813
+ if hasattr (self , 'notices' ) and self .notices is not None :
1814
+ _dict ['notices' ] = [x ._to_dict () for x in self .notices ]
1815
+ if hasattr (self , 'result' ) and self .result is not None :
1816
+ _dict ['result' ] = self .result ._to_dict ()
1817
+ return _dict
1818
+
1819
+ def _to_dict (self ):
1820
+ """Return a json dictionary representing this model."""
1821
+ return self .to_dict ()
1822
+
1823
+ def __str__ (self ) -> str :
1824
+ """Return a `str` version of this AnalyzedDocument object."""
1825
+ return json .dumps (self ._to_dict (), indent = 2 )
1826
+
1827
+ def __eq__ (self , other : 'AnalyzedDocument' ) -> bool :
1828
+ """Return `true` when self and other are equal, false otherwise."""
1829
+ if not isinstance (other , self .__class__ ):
1830
+ return False
1831
+ return self .__dict__ == other .__dict__
1832
+
1833
+ def __ne__ (self , other : 'AnalyzedDocument' ) -> bool :
1834
+ """Return `true` when self and other are not equal, false otherwise."""
1835
+ return not self == other
1836
+
1837
+
1838
+ class AnalyzedResult ():
1839
+ """
1840
+ Result of the document analysis.
1841
+
1842
+ :attr dict metadata: (optional) Metadata of the document.
1843
+ """
1844
+
1845
+ def __init__ (self , * , metadata : dict = None , ** kwargs ) -> None :
1846
+ """
1847
+ Initialize a AnalyzedResult object.
1848
+
1849
+ :param dict metadata: (optional) Metadata of the document.
1850
+ :param **kwargs: (optional) Any additional properties.
1851
+ """
1852
+ self .metadata = metadata
1853
+ for _key , _value in kwargs .items ():
1854
+ setattr (self , _key , _value )
1855
+
1856
+ @classmethod
1857
+ def from_dict (cls , _dict : Dict ) -> 'AnalyzedResult' :
1858
+ """Initialize a AnalyzedResult object from a json dictionary."""
1859
+ args = {}
1860
+ xtra = _dict .copy ()
1861
+ if 'metadata' in _dict :
1862
+ args ['metadata' ] = _dict .get ('metadata' )
1863
+ del xtra ['metadata' ]
1864
+ args .update (xtra )
1865
+ return cls (** args )
1866
+
1867
+ @classmethod
1868
+ def _from_dict (cls , _dict ):
1869
+ """Initialize a AnalyzedResult object from a json dictionary."""
1870
+ return cls .from_dict (_dict )
1871
+
1872
+ def to_dict (self ) -> Dict :
1873
+ """Return a json dictionary representing this model."""
1874
+ _dict = {}
1875
+ if hasattr (self , 'metadata' ) and self .metadata is not None :
1876
+ _dict ['metadata' ] = self .metadata
1877
+ if hasattr (self , '_additionalProperties' ):
1878
+ for _key in self ._additionalProperties :
1879
+ _value = getattr (self , _key , None )
1880
+ if _value is not None :
1881
+ _dict [_key ] = _value
1882
+ return _dict
1883
+
1884
+ def _to_dict (self ):
1885
+ """Return a json dictionary representing this model."""
1886
+ return self .to_dict ()
1887
+
1888
+ def __setattr__ (self , name : str , value : object ) -> None :
1889
+ properties = {'metadata' }
1890
+ if not hasattr (self , '_additionalProperties' ):
1891
+ super (AnalyzedResult , self ).__setattr__ ('_additionalProperties' ,
1892
+ set ())
1893
+ if name not in properties :
1894
+ self ._additionalProperties .add (name )
1895
+ super (AnalyzedResult , self ).__setattr__ (name , value )
1896
+
1897
+ def __str__ (self ) -> str :
1898
+ """Return a `str` version of this AnalyzedResult object."""
1899
+ return json .dumps (self ._to_dict (), indent = 2 )
1900
+
1901
+ def __eq__ (self , other : 'AnalyzedResult' ) -> bool :
1902
+ """Return `true` when self and other are equal, false otherwise."""
1903
+ if not isinstance (other , self .__class__ ):
1904
+ return False
1905
+ return self .__dict__ == other .__dict__
1906
+
1907
+ def __ne__ (self , other : 'AnalyzedResult' ) -> bool :
1908
+ """Return `true` when self and other are not equal, false otherwise."""
1909
+ return not self == other
1910
+
1911
+
1670
1912
class Collection ():
1671
1913
"""
1672
1914
A collection for storing documents.
0 commit comments