Skip to content

Commit

Permalink
add ability to update digital file metadata in preservica
Browse files Browse the repository at this point in the history
  • Loading branch information
[email protected] committed Mar 5, 2019
1 parent 40b7934 commit ec06ec3
Show file tree
Hide file tree
Showing 10 changed files with 463 additions and 6 deletions.
286 changes: 280 additions & 6 deletions CSV2Metadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,34 @@
*/



import org.apache.commons.cli.*;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpStatus;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPut;
import org.apache.http.conn.HttpClientConnectionManager;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.BasicHttpClientConnectionManager;
import org.apache.http.util.EntityUtils;
import org.w3c.dom.*;

import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.nio.charset.Charset;
import java.util.Base64;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Properties;

/**
* Class to read a CSV file with headers and create an XML file
Expand All @@ -35,6 +52,26 @@
*
*/
public class CSV2Metadata {

private Log log = LogFactory.getLog(getClass());


private HttpClientConnectionManager cm = new BasicHttpClientConnectionManager();
private CloseableHttpClient httpclient;

private javax.xml.parsers.DocumentBuilderFactory factory = javax.xml.parsers.DocumentBuilderFactory.newInstance();

private TransformerFactory transformerFactory = TransformerFactory.newInstance();

private static final String XIP_NS = "http://www.tessella.com/XIP/v4";

private Properties userDetails;

public CSV2Metadata(Properties userDetails) {
factory.setNamespaceAware(true);
this.userDetails = userDetails;
}

/**
* The java Main entry point for executing the class
*
Expand All @@ -51,6 +88,7 @@ public static void main(String[] args) {
options.addOption( "r", "root", true, "the root element of the dublin core xml, defaults to dc" );
options.addOption( "n", "namespace", true, "the root element namespace, defaults to http://purl.org/dc/elements/1.1/" );
options.addOption( "p", "prefix", true, "the root element namespace prefix, defaults to dc" );
options.addOption( "u", "user", true, "the property file with Preservica username & password" );
options.addOption( "h", "help", false, "print this message" );

HelpFormatter formatter = new HelpFormatter();
Expand All @@ -68,6 +106,8 @@ public static void main(String[] args) {
File inputFile = null;
File outputDir = null;

Properties userDetails = new Properties();

try {
// parse the command line arguments
CommandLine line = parser.parse( options, args );
Expand All @@ -83,6 +123,11 @@ public static void main(String[] args) {
rootElement = DEFAULT_ROOT_ELEMENT;
}

if ( line.hasOption( "u" ) ) {
String properties = line.getOptionValue( "u" );
userDetails.load(new FileInputStream(properties));
}

if ( line.hasOption( "p" ) ) {
rootPrefix = line.getOptionValue( "p" );
} else {
Expand Down Expand Up @@ -116,6 +161,7 @@ public static void main(String[] args) {
if ( line.hasOption( "o" ) ) {
String outputFolder = line.getOptionValue( "o" );
outputDir = new File(outputFolder);
outputDir.mkdirs();
if ( (!outputDir.exists()) || (!outputDir.isDirectory()) ) {
System.out.println(String.format("The output directory %s does not exist", outputFolder));
System.exit(1);
Expand All @@ -125,8 +171,10 @@ public static void main(String[] args) {
System.exit(1);
}



try {
CSV2Metadata metadata = new CSV2Metadata();
CSV2Metadata metadata = new CSV2Metadata(userDetails);
int files = metadata.parse(inputFile, outputDir, fileColumn, rootElement, rootPrefix, rootNamespace);
System.out.println(String.format("Created %d XML files in %s", files, outputDir.getName()));
} catch (Exception e) {
Expand All @@ -138,6 +186,10 @@ public static void main(String[] args) {
catch( ParseException exp ) {
System.out.println(exp.getMessage());
formatter.printHelp( cmdLine, options );
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}

Expand Down Expand Up @@ -219,6 +271,8 @@ private int parse(File csvDocument, File folder, String filenameColumn, String r
osw.write(">");
osw.write(System.getProperty("line.separator"));

String filerefId = null;

for (int i = 0; i < headerCount; i++) {
String header = headers[i];
boolean isDublinCore = (header.startsWith("dc:") || header.startsWith("dcterms:"));
Expand All @@ -232,15 +286,235 @@ private int parse(File csvDocument, File folder, String filenameColumn, String r
}
osw.write(System.getProperty("line.separator"));
}
if (header.toLowerCase().trim().startsWith("fileref")) {
filerefId = record.get(i).trim();
}
}
osw.write(String.format("</%s:%s>", rootPrefix, rootElement));
osw.flush();
osw.close();
fos.close();

numFiles++;

// if the entity does not have descriptive metadata with the required
// namespace then add it.

if (filerefId != null && (filerefId.length() > 0) ) {
if ((userDetails != null) && (!userDetails.isEmpty())) {
Document xipDocument = getEntity(filerefId);
if (xipDocument != null) {
if (!hasDublinCore(xipDocument, rootNamespace)) {
org.w3c.dom.Document dublinCoreDocument = getDocumentFromFile(xmlFile);
xipDocument = addDublinCore(dublinCoreDocument, xipDocument, rootNamespace);
updateEntity(xipDocument, filerefId);
} else {
System.out.println("Entity: " + filerefId + " already has Dublin Core metadata. Ignoring....");
}
} else {
System.out.println("Failed to find a Preservica entity with ID: " + filerefId);
}
} else {
System.out.println("Create a preservica.properties file with username and password");
System.out.println("to update entries");
}
}
}

return numFiles;
}

/**
* Update the Preservica File entity with the dublin core metadata
*
*
* @param document
* @param entityRef
*/
private void updateEntity(Document document, String entityRef) {
CloseableHttpClient client = getClient();
CloseableHttpResponse response = null;
try {

String domain = userDetails.getProperty("preservica.domain");

HttpPut putRequest = new HttpPut(String.format("https://%s/api/entity/digitalFiles/%s", domain, entityRef.trim()));
putRequest.setHeader("Authorization", getHeader());

document.normalize();

DOMSource domSource = new DOMSource(document);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
Transformer transformer = transformerFactory.newTransformer();
transformer.transform(domSource, result);

StringEntity se = new StringEntity(writer.toString(), "UTF-8");
putRequest.setEntity(se);
response = client.execute(putRequest);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
log.info("Updated object: " + entityRef);
}
if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
log.error("Failed to update entity");
log.error(response.getStatusLine().toString());
}
} catch (Exception ex) {
log.error(ex.getMessage());
throw new RuntimeException(ex);
} finally {
EntityUtils.consumeQuietly(response.getEntity());
IOUtils.closeQuietly(response);
}
return;
}

/**
* Add dublin core metadata to an existing file entity
*
* @param dublinCore
* @param xipDocument
* @return Document
*/
private Document addDublinCore(Document dublinCore, Document xipDocument, String namespace) {

// Create a new Metadata element
Element metadataElement = xipDocument.createElement("Metadata");
metadataElement.setAttribute("schemaURI", namespace);

// add the dublin core to it.
Node dublinCoreNode = xipDocument.importNode(dublinCore.getDocumentElement(), true);
metadataElement.appendChild(dublinCoreNode);

// metadata goes after the "Directory" element;
NodeList elements = xipDocument.getDocumentElement().getElementsByTagName("Directory");
if (elements.getLength() == 1) {
Element elem = (Element)elements.item(0);
elem.getParentNode().insertBefore(metadataElement, elem.getNextSibling());
}

return xipDocument;
}


/**
* Create a org.w3c.dom.Document from the dublin Core Metadata file
*
* @param xmlFile
* @return Document
*/
private Document getDocumentFromFile(File xmlFile) {
org.w3c.dom.Document document = null;
try {
javax.xml.parsers.DocumentBuilder builder = factory.newDocumentBuilder();
document = builder.parse(xmlFile);
} catch (Exception ex) {
log.error(ex);
}
return document;
}


/**
* Check that the current document does not have generic metadata already
* with the same namespace.
* make its safe to re-run the program
*
* @param document
* @param namespace
* @return true
*/
private boolean hasDublinCore(Document document, String namespace) {

NodeList list = document.getElementsByTagNameNS(XIP_NS, "Metadata");
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
NamedNodeMap namedNodeMap = node.getAttributes();
if (namedNodeMap != null) {
Node attribute = namedNodeMap.getNamedItem("schemaURI");
if (attribute != null) {
if (attribute.getNodeValue().equals(namespace)) {
return true;
}
}
}
}
return false;
}

private String getHeader() {
byte[] bytes = Base64.getEncoder().encode(String.format("%s:%s", userDetails.getProperty("preservica.username"), userDetails.getProperty("preservica.password")).getBytes());
return String.format("Basic %s", new String(bytes, Charset.forName("UTF-8")));
}

/**
* Get a Preservica entity by its reference
*
* @param entityRef
* @return org.w3c.dom Document of XIP XML
*/
private Document getEntity(String entityRef) {

String domain = userDetails.getProperty("preservica.domain");

CloseableHttpClient client = getClient();
CloseableHttpResponse response = null;
try {
HttpGet httpGet = new HttpGet(String.format("https://%s/api/entity/entities/%s", domain, entityRef.trim()));
httpGet.setHeader("Authorization", getHeader());
response = client.execute(httpGet);
if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
return getDocument(response);
}
if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK) {
log.error("Failed to create get entity");
log.error(response.getStatusLine().toString());
}
} catch (Exception ex) {
log.error(ex.getMessage());
throw new RuntimeException(ex);
} finally {
EntityUtils.consumeQuietly(response.getEntity());
IOUtils.closeQuietly(response);
}
return null;
}

/**
* Get the http client for the REST calls.
*
* @return HttpClient
*/
private CloseableHttpClient getClient() {
if (httpclient == null) {
httpclient = HttpClients.custom().setConnectionManager(cm).build();
}
return httpclient;
}

/**
* Create a document from a http response
*
* @param response
* @return Document
*
* @throws Exception
*/
private Document getDocument(CloseableHttpResponse response) throws Exception {
org.w3c.dom.Document document;
StringWriter sw = new StringWriter();
IOUtils.copy(response.getEntity().getContent(), sw);
javax.xml.parsers.DocumentBuilder builder = factory.newDocumentBuilder();
InputStream is = null;
try {
is = new java.io.ByteArrayInputStream(sw.toString().getBytes(Charset.forName("UTF-8")));
document = builder.parse(is);
} finally {
IOUtils.closeQuietly(is);
EntityUtils.consumeQuietly(response.getEntity());
IOUtils.closeQuietly(response);
}
return document;
}

}
Loading

0 comments on commit ec06ec3

Please sign in to comment.