Skip to content

Commit 130bcc4

Browse files
committed
Supporting update of product-level metadata.
1 parent e392547 commit 130bcc4

File tree

7 files changed

+311
-9
lines changed

7 files changed

+311
-9
lines changed

common/src/main/java/gov/nasa/jpl/edrn/labcas/Constants.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ public interface Constants {
1515

1616
public final static String ENV_FILEMGR_URL = "FILEMGR_URL";
1717
public final static String ENV_WORKFLOW_URL = "WORKFLOW_URL";
18+
public final static String ENV_SOLR_URL = "SOLR_URL";
1819

1920
// workflows
2021
public final static String WORKFLOW_LABCAS_UPOLOAD = "labcas-upload";
@@ -25,6 +26,7 @@ public interface Constants {
2526
public final static String METADATA_KEY_DESCRIPTION = "Description";
2627
public final static String METADATA_KEY_FILE_LOCATION = "FileLocation";
2728
public final static String METADATA_KEY_FILE_NAME = "Filename";
29+
public final static String METADATA_KEY_PRODUCT_NAME = "ProductName";
2830

2931
// dataset-level metadata file
3032
public final static String METADATA_FILE = "DatasetMetadata.xmlmet";

common/src/main/java/gov/nasa/jpl/edrn/labcas/FileManagerUtils.java

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import java.io.IOException;
66
import java.net.URL;
77
import java.util.Arrays;
8+
import java.util.HashMap;
89
import java.util.List;
910
import java.util.logging.Logger;
1011

@@ -19,6 +20,7 @@
1920
import org.w3c.dom.Element;
2021

2122
import gov.nasa.jpl.edrn.labcas.extractors.XmlFileMetExtractor;
23+
import gov.nasa.jpl.edrn.labcas.utils.SolrUtils;
2224

2325
/**
2426
* Class that contains common functionality to interact with the FileManager.
@@ -65,7 +67,7 @@ public static String uploadDataset(String dataset) throws Exception {
6567
}
6668

6769
// create product type directory with the same name
68-
File datasetDir = FileManagerUtils.getDatasetDir(dataset);
70+
File datasetDir = FileManagerUtils.getDatasetArchiveDir(dataset);
6971
File policyDir = new File(datasetDir, "policy");
7072
if (!policyDir.exists()) {
7173
policyDir.mkdirs();
@@ -138,13 +140,64 @@ public static String updateDataset(String dataset) throws Exception {
138140
// write the updated product type object to XML
139141
final List<ProductType> productTypes = Arrays.asList( new ProductType[] { productType });
140142

141-
File productTypesXmlFile = new File(FileManagerUtils.getDatasetDir(dataset), "/policy/product-types.xml");
143+
File productTypesXmlFile = new File(FileManagerUtils.getDatasetArchiveDir(dataset), "/policy/product-types.xml");
142144
XmlStructFactory.writeProductTypeXmlDocument(productTypes, productTypesXmlFile.getAbsolutePath());
143145
LOG.info("Written update product type metadata to XML file: "+ productTypesXmlFile.getAbsolutePath());
144146

145147
return productTypeName;
146148
}
147149

150+
/**
151+
* Method to update the metadata content of all products belonging to a given dataset
152+
* (latest version only).
153+
* @param dataset
154+
* @throws Exception
155+
*/
156+
public static void updateProducts(String dataset) throws Exception {
157+
158+
// determine latest dataset version
159+
int version = findLatestDatasetVersion(dataset);
160+
161+
// loop over .xmlmet files in staging directory
162+
File stagingDir = getDatasetStagingDir(dataset);
163+
String[] xmlmetFiles = stagingDir.list(new FilenameFilter() {
164+
@Override
165+
public boolean accept(File current, String name) {
166+
return name.endsWith(Constants.EDRN_METADATA_EXTENSION);
167+
}
168+
});
169+
170+
171+
// loop over products with additional metadata
172+
HashMap<String, Metadata> updateMetadataMap = new HashMap<String, Metadata>();
173+
for (String xmlmetFile : xmlmetFiles) {
174+
if (!xmlmetFile.equals(Constants.METADATA_FILE)) {
175+
176+
// filename
177+
String filename = xmlmetFile.replace(Constants.EDRN_METADATA_EXTENSION, "");
178+
// read in product metadata
179+
Metadata met = readMetadata( new File(stagingDir, xmlmetFile) );
180+
// retrieve product id
181+
String id = SolrUtils.queryProduct(dataset, version, filename);
182+
LOG.info("Updating product name: "+filename+" id: "+id+" with metadata from file: "+xmlmetFile);
183+
184+
if (id!=null) {
185+
// populate the map with the metadata to update
186+
updateMetadataMap.put(id, met);
187+
}
188+
189+
}
190+
191+
}
192+
193+
// send all updates at once
194+
if (updateMetadataMap.size()>0) {
195+
String solrXmlDocument = SolrUtils.buildSolrXmlDocument(updateMetadataMap);
196+
SolrUtils.postSolrXml(solrXmlDocument);
197+
}
198+
199+
}
200+
148201
/**
149202
* Method to instruct the File Manager to reload its policy configuration.
150203
* @throws Exception
@@ -168,7 +221,7 @@ public static void reload() throws Exception {
168221
*/
169222
public static int findLatestDatasetVersion(final String datasetName) {
170223

171-
File datasetDir = FileManagerUtils.getDatasetDir(datasetName);
224+
File datasetDir = FileManagerUtils.getDatasetArchiveDir(datasetName);
172225

173226
int version = 0;
174227
if (datasetDir.exists()) {
@@ -255,14 +308,21 @@ public static Metadata readMetadata(final File metadataFilepath) {
255308
* @param datasetName
256309
* @return
257310
*/
258-
private static File getDatasetDir(final String datasetName) {
311+
private static File getDatasetArchiveDir(final String datasetName) {
259312

260313
String archiveDir = System.getenv(Constants.ENV_LABCAS_ARCHIVE) + "/" + Constants.WORKFLOW_LABCAS_UPOLOAD;
261314
File datasetDir = new File(archiveDir, datasetName);
262315
return datasetDir;
263316

264317
}
265318

319+
private static File getDatasetStagingDir(final String datasetName) {
320+
321+
return new File(System.getenv(Constants.ENV_LABCAS_STAGING), datasetName);
322+
323+
}
324+
325+
266326
/**
267327
* Constructs the product type name from a dataset identifier.
268328
* @param dataset

common/src/main/java/gov/nasa/jpl/edrn/labcas/tasks/LabcasUpdateTaskInstance.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ public void run(Metadata metadata, WorkflowTaskConfiguration config) throws Work
3131
// update dataset object in File Manager
3232
FileManagerUtils.updateDataset(dataset);
3333

34+
// update products metadata directly into Solr
35+
FileManagerUtils.updateProducts(dataset);
36+
3437
// reload the catalog configuration so that the new product type is available for publishing
3538
FileManagerUtils.reload();
3639

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
package gov.nasa.jpl.edrn.labcas.utils;
2+
3+
import java.net.MalformedURLException;
4+
import java.util.ArrayList;
5+
import java.util.HashMap;
6+
import java.util.Iterator;
7+
import java.util.List;
8+
import java.util.logging.Logger;
9+
10+
import javax.xml.parsers.DocumentBuilder;
11+
import javax.xml.parsers.DocumentBuilderFactory;
12+
13+
import org.apache.http.HttpEntity;
14+
import org.apache.http.HttpResponse;
15+
import org.apache.http.client.HttpClient;
16+
import org.apache.http.client.methods.HttpPost;
17+
import org.apache.http.entity.ByteArrayEntity;
18+
import org.apache.http.impl.client.DefaultHttpClient;
19+
import org.apache.http.util.EntityUtils;
20+
import org.apache.oodt.cas.metadata.Metadata;
21+
import org.apache.solr.client.solrj.SolrQuery;
22+
import org.apache.solr.client.solrj.SolrServer;
23+
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
24+
import org.apache.solr.client.solrj.response.QueryResponse;
25+
import org.apache.solr.common.SolrDocument;
26+
import org.apache.solr.common.SolrDocumentList;
27+
import org.springframework.util.StringUtils;
28+
import org.w3c.dom.Document;
29+
import org.w3c.dom.Element;
30+
31+
import gov.nasa.jpl.edrn.labcas.Constants;
32+
import gov.nasa.jpl.edrn.labcas.XmlUtils;
33+
34+
/**
35+
* Class containing general utilities to query/update the Solr index behind an OODT File Manager.
36+
*
37+
* @author luca
38+
*
39+
*/
40+
public class SolrUtils {
41+
42+
private final static Logger LOG = Logger.getLogger(SolrUtils.class.getName());
43+
44+
// default value for SOLR URL
45+
private static String SOLR_URL = "http://localhost:8080/solr/oodt-fm";
46+
47+
// IMPORTANT: must re-use the same SolrServer instance across all requests to prevent memory leaks
48+
// see https://issues.apache.org/jira/browse/SOLR-861
49+
// this method instantiates the shared instance of SolrServer
50+
private static SolrServer solrServer = null;
51+
static {
52+
if (System.getenv(Constants.ENV_SOLR_URL)!=null) {
53+
SOLR_URL = System.getenv(Constants.ENV_SOLR_URL);
54+
try {
55+
solrServer = new CommonsHttpSolrServer( SOLR_URL );
56+
} catch(MalformedURLException e) {
57+
e.printStackTrace();
58+
LOG.warning(e.getMessage());
59+
}
60+
}
61+
}
62+
63+
/**
64+
* Utility method to query Solr for all products of a given dataset and version.
65+
*
66+
* @param datasetName
67+
* @param datasetVersion
68+
* @return
69+
*/
70+
public static List<String> queryAllProducts(String datasetName, int datasetVersion) {
71+
72+
List<String> ids = new ArrayList<String>();
73+
74+
// build Solr query
75+
SolrQuery request = new SolrQuery();
76+
request.setQuery("*:*");
77+
request.addFilterQuery("Dataset:"+datasetName,"Version:"+datasetVersion);
78+
request.setRows(Constants.MAX_SOLR_ROWS);
79+
80+
// execute Solr query
81+
try {
82+
83+
QueryResponse response = solrServer.query( request );
84+
SolrDocumentList docs = response.getResults();
85+
Iterator<SolrDocument> iter = docs.iterator();
86+
while (iter.hasNext()) {
87+
SolrDocument doc = iter.next();
88+
//LOG.fine(doc.toString());
89+
String id = (String) doc.getFieldValue("id");
90+
LOG.info("Retrieved Solr document id="+id);
91+
ids.add(id);
92+
}
93+
94+
} catch(Exception e) {
95+
e.printStackTrace();
96+
LOG.warning(e.getMessage()); // will return empty ids list
97+
}
98+
99+
return ids;
100+
101+
}
102+
103+
/**
104+
* Utility method to query Solr for a specific product belonging to a dataset and version.
105+
* @param datasetName
106+
* @param datasetVersion
107+
* @param productName
108+
* @return
109+
*/
110+
public static String queryProduct(String datasetName, int datasetVersion, String productName) {
111+
112+
String id = null;
113+
114+
// build Solr query
115+
SolrQuery request = new SolrQuery();
116+
request.setQuery("*:*");
117+
request.addFilterQuery(Constants.METADATA_KEY_DATASET+":"+datasetName,
118+
Constants.METADATA_KEY_VERSION+":"+datasetVersion,
119+
Constants.METADATA_KEY_PRODUCT_NAME+":"+productName);
120+
request.setRows(1);
121+
122+
// execute Solr query
123+
try {
124+
125+
QueryResponse response = solrServer.query( request );
126+
SolrDocumentList docs = response.getResults();
127+
Iterator<SolrDocument> iter = docs.iterator();
128+
while (iter.hasNext()) {
129+
SolrDocument doc = iter.next();
130+
//LOG.fine(doc.toString());
131+
id = (String) doc.getFieldValue("id");
132+
LOG.info("Retrieved Solr document id="+id);
133+
}
134+
135+
} catch(Exception e) {
136+
e.printStackTrace();
137+
LOG.warning(e.getMessage());
138+
}
139+
140+
return id;
141+
142+
}
143+
144+
/**
145+
* Utility method to build a Solr XML update document
146+
* for all given records and all metadata fields.
147+
* @param ids
148+
* @param datasetMetadata
149+
* @return
150+
*/
151+
public static String buildSolrXmlDocument(HashMap<String, Metadata> updateMetadataMap) throws Exception {
152+
153+
// create Solr/XML update document
154+
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
155+
DocumentBuilder builder = dbf.newDocumentBuilder();
156+
Document xmlDocument = builder.newDocument();
157+
158+
// <add>
159+
Element addElement = xmlDocument.createElement("add");
160+
xmlDocument.appendChild(addElement);
161+
162+
// loop over all records that must be updated
163+
for (String id : updateMetadataMap.keySet()) {
164+
165+
// <doc>
166+
Element docElement = xmlDocument.createElement("doc");
167+
addElement.appendChild(docElement);
168+
169+
// <field name="id">38b6e7e6-3a9b-4565-9d57-37e8104b4fde</field>
170+
Element fieldElement = xmlDocument.createElement("field");
171+
fieldElement.setAttribute("name", "id");
172+
fieldElement.insertBefore(xmlDocument.createTextNode(id), fieldElement.getLastChild());
173+
docElement.appendChild(fieldElement);
174+
175+
// to add one or more values:
176+
// <field name="Institution" update="set">Darthmouth</field>
177+
// <field name="Institution" update="set">Children Hospital</field>
178+
// to remove a key:
179+
// <field name="Institution" update="set" null="true"/>
180+
Metadata metadata = updateMetadataMap.get(id);
181+
for (String key : metadata.getAllKeys()) {
182+
for (String val : metadata.getAllMetadata(key)) {
183+
LOG.info("\t==> XML: Updating dataset metadata key=["+key+"] value=["+val+"]");
184+
185+
Element metFieldElement = xmlDocument.createElement("field");
186+
metFieldElement.setAttribute("name", key);
187+
metFieldElement.setAttribute("update", "set");
188+
if (StringUtils.hasText(val)) {
189+
// add this value to that key
190+
metFieldElement.insertBefore(xmlDocument.createTextNode(val), metFieldElement.getLastChild());
191+
} else {
192+
// remove all values for that key
193+
metFieldElement.setAttribute("null", "true");
194+
}
195+
docElement.appendChild(metFieldElement);
196+
197+
}
198+
}
199+
200+
} // loop over record ids
201+
202+
String xmlString = XmlUtils.xmlToString(xmlDocument);
203+
LOG.info(xmlString);
204+
return xmlString;
205+
206+
}
207+
208+
/**
209+
* Utility method to POST an XML document to Solr
210+
* @param solrXmlDocument
211+
*/
212+
public static void postSolrXml(String solrXmlDocument) {
213+
214+
//String strURL = "http://edrn-frontend.jpl.nasa.gov:8080/solr/oodt-fm/update?commit=true";
215+
String solrUpdateUrl = SOLR_URL + "/update?commit=true";
216+
217+
HttpClient client = new DefaultHttpClient();
218+
HttpPost post = new HttpPost(solrUpdateUrl);
219+
220+
try {
221+
HttpEntity entity = new ByteArrayEntity(solrXmlDocument.getBytes("UTF-8"));
222+
post.setEntity(entity);
223+
post.setHeader("Content-Type", "application/xml");
224+
HttpResponse response = client.execute(post);
225+
String result = EntityUtils.toString(response.getEntity());
226+
LOG.info("POST result="+result);
227+
228+
} catch(Exception e) {
229+
LOG.warning(e.getMessage());
230+
231+
} finally {
232+
// must release connection
233+
post.releaseConnection();
234+
}
235+
236+
}
237+
238+
}

common/src/main/scripts/env.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@ export CATALINA_HOME=$LABCAS_HOME/apache-tomcat
66
export SOLR_HOME=$LABCAS_HOME/solr-home
77
export SOLR_DATA_DIR=$LABCAS_HOME/solr-index
88
export CATALINA_OPTS='-Dsolr.solr.home=$SOLR_HOME -Dsolr.data.dir=$SOLR_DATA_DIR'
9+
export SOLR_URL=http://localhost:8080/solr/oodt-fm
910

1011
#export PGE_ROOT=$LABCAS_HOME/pges
1112

1213
export FILEMGR_URL=http://localhost:9000
1314
export WORKFLOW_URL=http://localhost:9001
1415

16+

0 commit comments

Comments
 (0)