From 023e7a208d1af1e79e05f42196d357bcc7ecff84 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 21 Jun 2024 11:44:47 -0700 Subject: [PATCH 01/21] Added the hashstore dependency. --- pom.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pom.xml b/pom.xml index db1546bc..da5f5fc5 100644 --- a/pom.xml +++ b/pom.xml @@ -288,6 +288,11 @@ jaxb-runtime 2.3.2 + + org.dataone + hashstore + 1.0-SNAPSHOT + From ab07a66c09eb64c66509bf48a77aff6b431bec8e Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 21 Jun 2024 19:07:04 -0700 Subject: [PATCH 02/21] Added the storage interface and implementation. --- .../dataone/indexer/storage/HashStorage.java | 86 +++++++++++++++++++ .../org/dataone/indexer/storage/Storage.java | 41 +++++++++ .../indexer/storage/StorageFactory.java | 28 ++++++ 3 files changed, 155 insertions(+) create mode 100644 src/main/java/org/dataone/indexer/storage/HashStorage.java create mode 100644 src/main/java/org/dataone/indexer/storage/Storage.java create mode 100644 src/main/java/org/dataone/indexer/storage/StorageFactory.java diff --git a/src/main/java/org/dataone/indexer/storage/HashStorage.java b/src/main/java/org/dataone/indexer/storage/HashStorage.java new file mode 100644 index 00000000..2bf632f0 --- /dev/null +++ b/src/main/java/org/dataone/indexer/storage/HashStorage.java @@ -0,0 +1,86 @@ +package org.dataone.indexer.storage; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.security.NoSuchAlgorithmException; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.configuration.Settings; +import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.HashStoreFactory; +import org.dataone.hashstore.exceptions.HashStoreFactoryException; + +/** + * The HashFileStore implementation of the Storage interface + */ +public class HashStorage implements Storage { + + private static Log logMetacat = LogFactory.getLog(HashStorage.class); + private static HashStorage hashStorage; + private HashStore hashStore; + + /** + * Private constructor + * @param className the name of the implementation class + * @throws IOException + * @throws HashStoreFactoryException + */ + private HashStorage(String className) throws HashStoreFactoryException, IOException { + String rootPath = Settings.getConfiguration().getString("storage.hashstore.rootDirectory"); + if (rootPath == null) { + throw new HashStoreFactoryException("HashStorage.constructor - The HashStore root path " + + " is null or blank from the property of storage.hashstore.rootDirectory"); + } + String directoryDepth = Settings.getConfiguration() + .getString("storage.hashstore.directory.depth", "3"); + String directoryNameWidth = Settings.getConfiguration() + .getString("storage.hashstore.directory.width", "2"); + String fileNameAlgorithm = Settings.getConfiguration() + .getString("storage.hashstore.fileNameAlgorithm", "SHA-256"); + String defaultNamespace = Settings.getConfiguration() + .getString("storage.hashstore.defaultNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootPath); + storeProperties.setProperty("storeDepth", directoryDepth); + storeProperties.setProperty("storeWidth", directoryNameWidth); + storeProperties.setProperty("storeAlgorithm", fileNameAlgorithm); + storeProperties.setProperty("storeMetadataNamespace", defaultNamespace); + hashStore = HashStoreFactory.getHashStore(className, storeProperties); + } + + /** + * Get the instance of the class through the singleton pattern + * @param className the name of the implementation class + * @return the instance of the class + * @throws IOException + */ + public static HashStorage getInstance(String className) throws IOException { + if(hashStorage == null) { + synchronized(HashStorage.class) { + if (hashStorage == null) { + hashStorage = new HashStorage(className); + } + } + } + return hashStorage; + } + + @Override + public InputStream retrieveObject(String pid) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException { + return hashStorage.retrieveObject(pid); + } + + @Override + public InputStream retrieveSystemMetadata(String pid) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException { + return hashStorage.retrieveSystemMetadata(pid); + } + +} diff --git a/src/main/java/org/dataone/indexer/storage/Storage.java b/src/main/java/org/dataone/indexer/storage/Storage.java new file mode 100644 index 00000000..e138d976 --- /dev/null +++ b/src/main/java/org/dataone/indexer/storage/Storage.java @@ -0,0 +1,41 @@ +package org.dataone.indexer.storage; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.security.NoSuchAlgorithmException; + + +/** + * The Storage represents the interface to access the objects and system metadata + */ +public interface Storage { + /** + * Returns an InputStream to an object from HashStore using a given persistent identifier. + * + * @param pid Authority-based identifier + * @return Object InputStream + * @throws IllegalArgumentException When pid is null or empty + * @throws FileNotFoundException When requested pid has no associated object + * @throws IOException I/O error when creating InputStream to object + * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not + * supported + */ + public InputStream retrieveObject(String pid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; + + /** + * Returns an InputStream to the system metadata content of a given pid + * + * @param pid Authority-based identifier + * @return Metadata InputStream + * @throws IllegalArgumentException When pid/formatId is null or empty + * @throws FileNotFoundException When requested pid+formatId has no associated object + * @throws IOException I/O error when creating InputStream to metadata + * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not + * supported + */ + public InputStream retrieveSystemMetadata(String pid) throws IllegalArgumentException, + FileNotFoundException, IOException, NoSuchAlgorithmException; + +} diff --git a/src/main/java/org/dataone/indexer/storage/StorageFactory.java b/src/main/java/org/dataone/indexer/storage/StorageFactory.java new file mode 100644 index 00000000..fcbca1f1 --- /dev/null +++ b/src/main/java/org/dataone/indexer/storage/StorageFactory.java @@ -0,0 +1,28 @@ +package org.dataone.indexer.storage; + +import java.io.IOException; + +import org.dataone.configuration.Settings; + +/** + * The factory class to create a Storage instance + */ +public class StorageFactory { + + /** + * Get the Storage implementation instance + * @return the Storage class instance + * @throws IOException + * @throws ServiceException + */ + public static Storage getStorage() throws IOException, IllegalArgumentException{ + String className = Settings.getConfiguration().getString("storage.className"); + if (className != null && className.startsWith("org.dataone.hashstore")) { + return HashStorage.getInstance(className); + } else { + throw new IllegalArgumentException("StorageFactory.getStorage - Unrecognized the " + + " storage class " + className + + ". So Indexer can't initialize the storage system."); + } + } +} From 3248f611cf473485c375d607f67d363a67566d88 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 24 Jun 2024 13:41:44 -0700 Subject: [PATCH 03/21] Removed the commented out code. Shortened the long statements. --- .../org/dataone/cn/indexer/SolrIndex.java | 355 ++++++++---------- 1 file changed, 147 insertions(+), 208 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index 104a158a..cf8ace04 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -109,8 +109,10 @@ public class SolrIndex { * @throws SAXException * @throws IOException */ - public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, BaseXPathDocumentSubprocessor systemMetadataProcessor, HTTPService httpService) - throws XPathExpressionException, ParserConfigurationException, IOException, SAXException { + public SolrIndex(XMLNamespaceConfig xmlNamespaceConfig, + BaseXPathDocumentSubprocessor systemMetadataProcessor, HTTPService httpService) + throws XPathExpressionException, ParserConfigurationException, + IOException, SAXException { this.xmlNamespaceConfig = xmlNamespaceConfig; this.systemMetadataProcessor = systemMetadataProcessor; this.httpService = httpService; @@ -121,7 +123,8 @@ private void init() throws ParserConfigurationException, XPathExpressionExceptio sysmetaSolrFields = systemMetadataProcessor.getFieldList(); copyFields = httpService.getSolrCopyFields(); if (copyFields != null) { - log.info("SolrIndex.init - the size of the copy fields from the solr schema is : " + copyFields.size()); + log.info("SolrIndex.init - the size of the copy fields from the solr schema is : " + + copyFields.size()); for(String copyField : copyFields) { log.debug("SolrIndex.init - the copy field from the solr schema: " + copyField); } @@ -181,16 +184,18 @@ public void setDeleteSubprocessors( * @throws NotFound * @throws NotImplemented */ - private Map process(String id, SystemMetadata systemMetadata, String objectPath, boolean isSysmetaChangeOnly) - throws IOException, SAXException, ParserConfigurationException, - XPathExpressionException, MarshallingException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{ + private Map process(String id, SystemMetadata systemMetadata, + String objectPath, boolean isSysmetaChangeOnly) + throws IOException, SAXException, ParserConfigurationException, + XPathExpressionException, MarshallingException, EncoderException, + SolrServerException, NotImplemented, NotFound, UnsupportedType{ log.debug("SolrIndex.process - trying to generate the solr doc object for the pid "+id); long start = System.currentTimeMillis(); Map docs = new HashMap(); // Load the System Metadata document ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream); - ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); try { docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream); } catch (Exception e) { @@ -198,7 +203,6 @@ private Map process(String id, SystemMetadata systemMetadata, S throw new SolrServerException(e.getMessage()); } long end = System.currentTimeMillis(); - //log.info("SolrIndex.process - the time for processing the system metadata for the pid " + id + " is " + (end-start) + "milliseconds."); // get the format id for this object String formatId = docs.get(id).getFirstFieldValue(SolrElementField.FIELD_OBJECTFORMAT); boolean skipOtherProcessor = false; @@ -207,12 +211,13 @@ private Map process(String id, SystemMetadata systemMetadata, S //we need to make the solr doc exists (means the resource map was processed SolrDoc existingResourceMapSolrDoc = httpService.getSolrDocumentById(solrQueryUri, id); if (existingResourceMapSolrDoc != null ) { - log.info("SolrIndex.process - This is a systemmetadata-change-only event for the resource map " + id + - ". So we only use the system metadata subprocessor"); + log.info("SolrIndex.process - This is a systemmetadata-change-only event for the " + + "resource map " + id + ". So we only use the system metadata subprocessor"); skipOtherProcessor = true; } else { - log.info("SolrIndex.process - There is no solr doc for the resource map " + id + - ". Even though this is a systemmetadata-change-only event, we can NOT just reindex the systemmeta only."); + log.info("SolrIndex.process - There is no solr doc for the resource map " + id + + ". Even though this is a systemmetadata-change-only event, we can NOT " + + "just reindex the systemmeta only."); } } @@ -234,20 +239,23 @@ private Map process(String id, SystemMetadata systemMetadata, S // metadata document. // note that resource map processing touches all objects // referenced by the resource map. - //start = System.currentTimeMillis(); FileInputStream dataStream = new FileInputStream(objectPath); - //end = System.currentTimeMillis(); - //log.info("SolrIndex.process - the time for reading the file input stream " + " for the pid " + id + " is " + (end-start) + "milliseconds."); if (!dataStream.getFD().valid()) { - log.error("SolrIndex.process - subprocessor "+ subprocessor.getClass().getName() +" couldn't process since it could not load OBJECT file for ID,Path=" + id + ", " - + objectPath); + log.error("SolrIndex.process - subprocessor " + + subprocessor.getClass().getName() + + " couldn't process since it could not load OBJECT file for ID,Path=" + + id + ", " + objectPath); //throw new Exception("Could not load OBJECT for ID " + id ); } else { start = System.currentTimeMillis(); docs = subprocessor.processDocument(id, docs, dataStream); end = System.currentTimeMillis(); - log.info("SolrIndex.process - the time for calling processDocument for the subprocessor " + subprocessor.getClass().getName() +" for the pid " + id + " is " + (end-start) + "milliseconds."); - log.debug("SolrIndex.process - subprocessor "+ subprocessor.getClass().getName() +" generated solr doc for id "+id); + log.info("SolrIndex.process - the time for calling processDocument " + + "for the subprocessor " + subprocessor.getClass().getName() + +" for the pid " + id + " is " + (end-start) + "milliseconds."); + log.debug("SolrIndex.process - subprocessor " + + subprocessor.getClass().getName() + +" generated solr doc for id "+id); } } catch (Exception e) { e.printStackTrace(); @@ -258,14 +266,6 @@ private Map process(String id, SystemMetadata systemMetadata, S } } } - - /*if(docs != null) { - SolrDoc solrDoc = docs.get(id); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - solrDoc.serialize(baos, "UTF-8"); - log.warn("after process the science metadata, the solr doc is \n"+baos.toString()); - }*/ - // TODO: in the XPathDocumentParser class in d1_cn_index_process module, // merge is only for resource map. We need more work here. for (SolrDoc mergeDoc : docs.values()) { @@ -273,15 +273,6 @@ private Map process(String id, SystemMetadata systemMetadata, S mergeWithIndexedDocument(mergeDoc); } } - - /*if(docs != null) { - SolrDoc solrDoc = docs.get(id); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - solrDoc.serialize(baos, "UTF-8"); - log.warn("after merge, the solr doc is \n"+baos.toString()); - }*/ - //SolrElementAdd addCommand = getAddCommand(new ArrayList(docs.values())); - return docs; } @@ -310,16 +301,11 @@ private Map process(String id, SystemMetadata systemMetadata, S // TODO:combine merge function with resourcemap merge function private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOException, - EncoderException, XPathExpressionException, SolrServerException, ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType { - //Retrieve the existing solr document from the solr server for the id. If it doesn't exist, null or empty solr doc will be returned. + EncoderException, XPathExpressionException, SolrServerException, + ParserConfigurationException, SAXException, NotImplemented, NotFound, UnsupportedType { + //Retrieve the existing solr document from the solr server for the id. If it doesn't exist, + //null or empty solr doc will be returned. SolrDoc indexedDocument = httpService.getSolrDocumentById(solrQueryUri, indexDocument.getIdentifier()); - /*int wait = new Double(Math.random() * 10000).intValue(); - System.out.println("++++++++++++++++++++++++++++ the wait time is " + wait); - try { - Thread.sleep(wait); - } catch (Exception e) { - - }*/ if (indexedDocument == null || indexedDocument.getFieldList().size() <= 0) { return indexDocument; } else { @@ -330,17 +316,25 @@ private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOExcepti .getName().equals(SolrElementField.FIELD_RESOURCEMAP)) && !indexDocument.hasFieldWithValue(field.getName(), field.getValue())) { indexDocument.addField(field); - } else if (!copyFields.contains(field.getName()) && !indexDocument.hasField(field.getName()) && !isSystemMetadataField(field.getName())) { + } else if (!copyFields.contains(field.getName()) + && !indexDocument.hasField(field.getName()) + && !isSystemMetadataField(field.getName())) { // we don't merge the system metadata field since they can be removed. // we don't merge the copyFields as well - log.debug("SolrIndex.mergeWithIndexedDocument - put the merge-needed existing solr field "+field.getName()+" with value "+field.getValue()+" from the solr server to a vector. We will merge it later."); - //indexDocument.addField(field); - mergeNeededFields.add(field);//record this name since we can have mutiple name/value for the same name. See https://projects.ecoinformatics.org/ecoinfo/issues/7168 + log.debug("SolrIndex.mergeWithIndexedDocument - put the merge-needed existing solr field " + + field.getName() + " with value " + field.getValue() + + " from the solr server to a vector. We will merge it later."); + //record this name since we can have mutiple name/value for the same name. + //See https://projects.ecoinformatics.org/ecoinfo/issues/7168 + mergeNeededFields.add(field); } } if(mergeNeededFields != null) { for(SolrElementField field: mergeNeededFields) { - log.debug("SolrIndex.mergeWithIndexedDocument - merge the existing solr field "+field.getName()+" with value "+field.getValue()+" from the solr server to the currently processing document of "+indexDocument.getIdentifier()); + log.debug("SolrIndex.mergeWithIndexedDocument - merge the existing solr field " + + field.getName() + " with value " + field.getValue() + +" from the solr server to the currently processing document of " + + indexDocument.getIdentifier()); indexDocument.addField(field); } } @@ -348,7 +342,7 @@ private SolrDoc mergeWithIndexedDocument(SolrDoc indexDocument) throws IOExcepti return indexDocument; } } - + /* * If the given field name is a system metadata field. */ @@ -357,7 +351,9 @@ private boolean isSystemMetadataField(String fieldName) { if (fieldName != null && !fieldName.trim().equals("") && sysmetaSolrFields != null) { for(ISolrField field : sysmetaSolrFields) { if(field != null && field.getName() != null && field.getName().equals(fieldName)) { - log.debug("SolrIndex.isSystemMetadataField - the field name "+fieldName+" matches one record of system metadata field list. It is a system metadata field."); + log.debug("SolrIndex.isSystemMetadataField - the field name " + fieldName + + " matches one record of system metadata field list. It is a " + + "system metadata field."); is = true; break; } @@ -379,11 +375,9 @@ private void checkParams(Identifier pid, SystemMetadata systemMetadata, String o throw new InvalidRequest("0000", "The identifier of the indexed document should not be null or blank."); } if(systemMetadata == null) { - throw new InvalidRequest("0000", "The system metadata of the indexed document "+pid.getValue()+ " should not be null."); + throw new InvalidRequest("0000", "The system metadata of the indexed document " + + pid.getValue() + " should not be null."); } - /*if(objectPath == null) { - throw new SolrServerException("The indexed document itself for pid "+pid.getValue()+" should not be null."); - }*/ } /** @@ -391,23 +385,26 @@ private void checkParams(Identifier pid, SystemMetadata systemMetadata, String o * @param pid the id of this document * @param systemMetadata the system metadata associated with the data object * @param data the path to the object file itself - * @throws SolrServerException - * @throws MarshallingException - * @throws EncoderException - * @throws UnsupportedType - * @throws NotFound - * @throws NotImplemented + * @throws SolrServerException + * @throws MarshallingException + * @throws EncoderException + * @throws UnsupportedType + * @throws NotFound + * @throws NotImplemented * @throws InvalidRequest */ - private void insert(Identifier pid, SystemMetadata systemMetadata, String objectPath, boolean isSysmetaChangeOnly) + private void insert(Identifier pid, SystemMetadata systemMetadata, + String objectPath, boolean isSysmetaChangeOnly) throws IOException, SAXException, ParserConfigurationException, InvalidRequest, - XPathExpressionException, SolrServerException, MarshallingException, EncoderException, NotImplemented, NotFound, UnsupportedType { + XPathExpressionException, SolrServerException, MarshallingException, + EncoderException, NotImplemented, NotFound, UnsupportedType { checkParams(pid, systemMetadata, objectPath); log.debug("SolrIndex.insert - trying to insert the solrDoc for object "+pid.getValue()); long start = System.currentTimeMillis(); Map docs = process(pid.getValue(), systemMetadata, objectPath, isSysmetaChangeOnly); long end = System.currentTimeMillis(); - log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is " + (end-start) + " milliseconds."); + log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is " + + (end-start) + " milliseconds."); //transform the Map to the SolrInputDocument which can be used by the solr server if(docs != null) { start = System.currentTimeMillis(); @@ -416,18 +413,22 @@ private void insert(Identifier pid, SystemMetadata systemMetadata, String object if(id != null) { SolrDoc doc = docs.get(id); insertToIndex(doc); - log.debug("SolrIndex.insert - inserted the solr-doc object of pid "+id+", which relates to object "+pid.getValue()+", into the solr server."); + log.debug("SolrIndex.insert - inserted the solr-doc object of pid " + id + + ", which relates to object " + pid.getValue() + + ", into the solr server."); } } end = System.currentTimeMillis(); - log.info("SolrIndex.insert - finished to insert the solrDoc to the solr server for object " + pid.getValue() + - " and it took " + (end-start) + " milliseconds."); + log.info("SolrIndex.insert - finished to insert the solrDoc to the solr server for " + + " object " + pid.getValue() + " and it took " + (end-start) + + " milliseconds."); } else { - log.debug("SolrIndex.insert - the genered solrDoc is null. So we will not index the object "+pid.getValue()); + log.debug("SolrIndex.insert - the genered solrDoc is null. So we will not index the " + + "object "+pid.getValue()); } } - + /* * Insert a SolrDoc to the solr server. */ @@ -438,38 +439,6 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException httpService.sendUpdate(solrIndexUri, addCommand, "UTF-8"); } - - /*private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException { - if(doc != null ) { - SolrInputDocument solrDoc = new SolrInputDocument(); - List list = doc.getFieldList(); - if(list != null) { - //solrDoc.addField(METACATPIDFIELD, pid); - Iterator iterator = list.iterator(); - while (iterator.hasNext()) { - SolrElementField field = iterator.next(); - if(field != null) { - String value = field.getValue(); - String name = field.getName(); - log.trace("SolrIndex.insertToIndex - add name/value pair - "+name+"/"+value); - solrDoc.addField(name, value); - } - } - } - if(!solrDoc.isEmpty()) { - try { - UpdateResponse response = solrServer.add(solrDoc); - solrServer.commit(); - } catch (SolrServerException e) { - throw e; - } catch (IOException e) { - throw e; - } - //System.out.println("=================the response is:\n"+response.toString()); - } - } - }*/ - /** * Update the solr index. This method handles the three scenarios: * 1. Remove an existing doc - if the the system metadata shows the value of the archive is true, @@ -494,11 +463,14 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException * @throws IllegalAccessException * @throws InstantiationException */ - public void update(Identifier pid, String relativePath, boolean isSysmetaChangeOnly) throws InvalidToken, NotAuthorized, - NotImplemented, ServiceFailure, NotFound, XPathExpressionException, UnsupportedType, - SAXException, ParserConfigurationException, SolrServerException, MarshallingException, - EncoderException, InterruptedException, IOException, InvalidRequest, InstantiationException, IllegalAccessException { - log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object "+pid.getValue()); + public void update(Identifier pid, String relativePath, boolean isSysmetaChangeOnly) + throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, + XPathExpressionException, UnsupportedType, SAXException, + ParserConfigurationException, SolrServerException, MarshallingException, + EncoderException, InterruptedException, IOException, InvalidRequest, + InstantiationException, IllegalAccessException { + log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object " + + pid.getValue()); String objectPath = null; SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativePath); objectPath = ObjectManager.getInstance().getFilePath(relativePath, systemMetadata.getFormatId().getValue()); @@ -506,8 +478,9 @@ public void update(Identifier pid, String relativePath, boolean isSysmetaChangeO insert(pid, systemMetadata, objectPath, isSysmetaChangeOnly); } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { - log.info("SolrIndex.update - Indexer grabbed an older verion (version conflict) of the solr doc for object " + - pid.getValue() + ". It will try " + VERSION_CONFLICT_MAX_ATTEMPTS + " to fix the issues"); + log.info("SolrIndex.update - Indexer grabbed an older verion (version conflict) of " + + "the solr doc for object " + pid.getValue() + + ". It will try " + VERSION_CONFLICT_MAX_ATTEMPTS + " to fix the issues"); for (int i=0; i docsToUpdate = getUpdatedSolrDocsByRemovingResourceMap(pid); if (docsToUpdate != null && !docsToUpdate.isEmpty()) { - //SolrElementAdd addCommand = new SolrElementAdd(docsToUpdate); - //httpService.sendUpdate(solrIndexUri, addCommand); for(SolrDoc doc : docsToUpdate) { - //deleteDocFromIndex(doc.getIdentifier()); insertToIndex(doc); } } break; } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { - log.info("SolrIndex.removeDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object" + - ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); + log.info("SolrIndex.removeDataPackage - Indexer grabbed an older verion " + + "(version conflict) of the solr doc for object" + + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i ) + + " to fix the issues"); } else { throw e; } @@ -650,12 +631,13 @@ private void removeDataPackage(String pid) throws IOException, UnsupportedType, * Get the list of the solr doc which need to be updated because the removal of the resource map */ private List getUpdatedSolrDocsByRemovingResourceMap(String resourceMapId) - throws UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, SAXException, MalformedURLException, IOException, XPathExpressionException, EncoderException { + throws UnsupportedType, NotFound, SolrServerException, ParserConfigurationException, + SAXException, MalformedURLException, IOException, XPathExpressionException, + EncoderException { List updatedSolrDocs = null; if (resourceMapId != null && !resourceMapId.trim().equals("")) { - /*List docsContainResourceMap = httpService.getDocumentsByResourceMap( - solrQueryUri, resourceMapId);*/ - List docsContainResourceMap = httpService.getDocumentsByResourceMap(solrQueryUri, resourceMapId); + List docsContainResourceMap = httpService + .getDocumentsByResourceMap(solrQueryUri, resourceMapId); updatedSolrDocs = removeResourceMapRelationship(docsContainResourceMap, resourceMapId); } @@ -740,15 +722,6 @@ private List removeAggregatedItems(String targetResourceMapId, SolrDoc doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, targetResourceMapId); updatedSolrDocs.add(doc); - /*if (aggregatedItemsInDoc.size() > 1) { - - - } else { - //multiple resource map aggregate same metadata and data. Just remove the resource map - doc.removeFieldsWithValue(SolrElementField.FIELD_RESOURCEMAP, - targetResourceMapId); - updatedSolrDocs.add(doc); - }*/ } } return updatedSolrDocs; @@ -835,32 +808,43 @@ private List mergeUpdatedSolrDocs(ListremovedDocumentBy, List< SolrDoc docInRemovedDocs = removedDocuments.get(j); if(docInRemovedDocBy.getIdentifier().equals(docInRemovedDocs.getIdentifier())) { //find the same doc in both list. let's merge them. - //first get all the documents element from the docWithDocs(it has the correct information about the documents element) - List idsInDocuments = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS); - docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_DOCUMENTS);//clear out any documents element in docInRemovedDocBy + //first get all the documents element from the docWithDocs + //(it has the correct information about the documents element) + List idsInDocuments = docInRemovedDocs + .getAllFieldValues(SolrElementField.FIELD_DOCUMENTS); + //clear out any documents element in docInRemovedDocBy + docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_DOCUMENTS); //add the Documents element from the docInRemovedDocs if it has any. - // The docInRemovedDocs has the correct information about the documentBy. Now it copied the correct information of the documents element. - // So docInRemovedDocs has both correct information about the documentBy and documents elements. + // The docInRemovedDocs has the correct information about the documentBy. + // Now it copied the correct information of the documents element. + // So docInRemovedDocs has both correct information about the documentBy + //and documents elements. if(idsInDocuments != null) { for(String id : idsInDocuments) { if(id != null && !id.trim().equals("")) { - docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_DOCUMENTS, id)); + docInRemovedDocBy.addField( + new SolrElementField(SolrElementField.FIELD_DOCUMENTS, id)); } } } //intersect the resource map ids. - List resourceMapIdsInWithDocs = docInRemovedDocs.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); - List resourceMapIdsInWithDocBy = docInRemovedDocBy.getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); + List resourceMapIdsInWithDocs = docInRemovedDocs + .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); + List resourceMapIdsInWithDocBy = docInRemovedDocBy + .getAllFieldValues(SolrElementField.FIELD_RESOURCEMAP); docInRemovedDocBy.removeAllFields(SolrElementField.FIELD_RESOURCEMAP); - Collection resourceMapIds = CollectionUtils.union(resourceMapIdsInWithDocs, resourceMapIdsInWithDocBy); + Collection resourceMapIds = CollectionUtils.union(resourceMapIdsInWithDocs, + resourceMapIdsInWithDocBy); if(resourceMapIds != null) { for(Object idObj : resourceMapIds) { String id = (String)idObj; - docInRemovedDocBy.addField(new SolrElementField(SolrElementField.FIELD_RESOURCEMAP, id)); + docInRemovedDocBy.addField(new SolrElementField( + SolrElementField.FIELD_RESOURCEMAP, id)); } } - //we don't need do anything about the documentBy elements since the docInRemovedDocBy has the correct information. + //we don't need do anything about the documentBy elements since the + //docInRemovedDocBy has the correct information. mergedDocuments.add(docInRemovedDocBy); //delete the two documents from the list removedDocumentBy.remove(i); @@ -870,8 +854,8 @@ private List mergeUpdatedSolrDocs(ListremovedDocumentBy, List< } } - // when we get there, if the two lists are empty, this will be a perfect merge. However, if something are left. we - //just put them in. + // when we get there, if the two lists are empty, this will be a perfect merge. + // However, if something are left. we just put them in. for(SolrDoc doc: removedDocumentBy) { mergedDocuments.add(doc); } @@ -886,7 +870,8 @@ private List mergeUpdatedSolrDocs(ListremovedDocumentBy, List< /* * Remove a pid which is part of resource map. */ - private void removeFromDataPackage(String pid) throws XPathExpressionException, IOException, EncoderException, SolrServerException { + private void removeFromDataPackage(String pid) throws XPathExpressionException, IOException, + EncoderException, SolrServerException { SolrDoc indexedDoc = httpService.getSolrDocumentById(solrQueryUri, pid); deleteDocFromIndex(pid); List documents = indexedDoc.getAllFieldValues(SolrElementField.FIELD_DOCUMENTS); @@ -903,8 +888,10 @@ private void removeFromDataPackage(String pid) throws XPathExpressionException, break; } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { - log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object " + - documentsValue + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); + log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older " + + "verion (version conflict) of the solr doc for object " + + documentsValue + ". It will try " + + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); } else { throw e; } @@ -927,8 +914,10 @@ private void removeFromDataPackage(String pid) throws XPathExpressionException, break; } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { - log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older verion (version conflict) of the solr doc for object " + - documentedByValue + ". It will try " + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); + log.info("SolrIndex.removeFromDataPackage - Indexer grabbed an older " + + "verion (version conflict) of the solr doc for object " + + documentedByValue + ". It will try " + + (VERSION_CONFLICT_MAX_ATTEMPTS - i )+ " to fix the issues"); } else { throw e; } @@ -938,68 +927,18 @@ private void removeFromDataPackage(String pid) throws XPathExpressionException, } } - /* - * Remove a pid from the solr index - */ - /*private synchronized void removeFromIndex(String identifier) throws Exception { - - - Map docs = new HashMap(); - - for (IDocumentDeleteSubprocessor deleteSubprocessor : deleteSubprocessors) { - docs.putAll(deleteSubprocessor.processDocForDelete(identifier, docs)); - } - List docsToUpdate = new ArrayList(); - List idsToIndex = new ArrayList(); - for (String idToUpdate : docs.keySet()) { - if (docs.get(idToUpdate) != null) { - docsToUpdate.add(docs.get(idToUpdate)); - } else { - idsToIndex.add(idToUpdate); - } - } - - // update the docs we have - for (SolrDoc docToUpdate : docsToUpdate) { - insertToIndex(docToUpdate); - } - - // delete this one - deleteDocFromIndex(identifier); - // index the rest - //TODO: we need to figure out how to get the file path - for (String idToIndex : idsToIndex) { - Identifier pid = new Identifier(); - pid.setValue(idToIndex); - SystemMetadata sysMeta = DistributedMapsFactory.getSystemMetadata(idToIndex); - if (SolrDoc.visibleInIndex(sysMeta)) { - String objectPath = DistributedMapsFactory.getObjectPathMap().get(pid); - boolean isSysmetaChangeOnlyEvent = false; - insert(pid, sysMeta, objectPath, isSysmetaChangeOnlyEvent); - } - } - - }*/ - private void deleteDocFromIndex(String pid) throws IOException { if (pid != null && !pid.trim().equals("")) { try { - //solrServer.deleteById(pid); - //solrServer.commit(); httpService.sendSolrDelete(pid, solrIndexUri); - //} catch (SolrServerException e) { - //throw e; - } catch (IOException e) { throw e; } } - } - /** * Set the http service * @param service @@ -1015,5 +954,5 @@ public void setHttpService(HTTPService service) { public HTTPService getHttpService() { return httpService; } - + } From ab0a72111e0ec33fff6ac3451c7bb5f356689c61 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 24 Jun 2024 17:34:49 -0700 Subject: [PATCH 04/21] Add a method to get the input stream of system metadata for an identifier. --- .../cn/indexer/object/ObjectManager.java | 245 ++++++++---------- 1 file changed, 103 insertions(+), 142 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index f897903f..affe320d 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -20,12 +20,16 @@ */ package org.dataone.cn.indexer.object; +import java.io.ByteArrayInputStream; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.nio.file.FileSystems; import java.nio.file.Files; import org.apache.commons.io.FileUtils; +import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.log4j.Logger; import org.dataone.client.auth.AuthTokenSession; import org.dataone.client.exception.ClientSideException; @@ -37,6 +41,8 @@ import org.dataone.client.v2.impl.MultipartMNode; import org.dataone.configuration.Settings; import org.dataone.exceptions.MarshallingException; +import org.dataone.indexer.storage.Storage; +import org.dataone.indexer.storage.StorageFactory; import org.dataone.service.exceptions.InvalidToken; import org.dataone.service.exceptions.NotAuthorized; import org.dataone.service.exceptions.NotFound; @@ -56,65 +62,50 @@ */ public class ObjectManager { private static ObjectManager manager = null; - private static String dataRootDir = Settings.getConfiguration().getString("index.data.root.directory"); - private static String documentRootDir = Settings.getConfiguration().getString("index.document.root.directory"); private static String nodeBaseURL = Settings.getConfiguration().getString("dataone.mn.baseURL"); private static String DataONEauthToken = null; private static Logger logger = Logger.getLogger(ObjectManager.class); + private static Storage storage = null; private static final String TOKEN_VARIABLE_NAME = "DATAONE_AUTH_TOKEN"; private static final String TOKEN_FILE_PATH_PROP_NAME = "dataone.nodeToken.file"; private static final String SYSTEMMETA_FILE_NAME = "systemmetadata.xml"; private static MultipartD1Node d1Node = null; private static Session session = null; - private static boolean ifDataAndDocRootSame = false; - + + /** * Private constructor - * @throws ServiceFailure + * @throws ServiceFailure + * @throws IOException + * @throws IllegalArgumentException */ - private ObjectManager() throws ServiceFailure { - if (dataRootDir == null || dataRootDir.trim().equals("")) { - throw new ServiceFailure("0000", "The data root directory specified by the property index.data.root.directory is blank in the properties file"); - } - if (documentRootDir == null || documentRootDir.trim().equals("")) { - throw new ServiceFailure("0000", "The metadata root directory specified by the property index.document.root.directory is blank in the properties file"); - } - if (!Files.exists(FileSystems.getDefault().getPath(dataRootDir))) { - throw new ServiceFailure("0000", "The data root directory " + dataRootDir + - " specified in the properties file doesn't exist"); - } - if (!Files.exists(FileSystems.getDefault().getPath(documentRootDir))) { - throw new ServiceFailure("0000", "The document root directory " + documentRootDir + - " specified in the properties file doesn't exist"); - } - if (!dataRootDir.endsWith("/")) { - dataRootDir = dataRootDir + "/"; - } - if (!documentRootDir.endsWith("/")) { - documentRootDir = documentRootDir + "/"; - } - - if (documentRootDir.equals(dataRootDir)) { - ifDataAndDocRootSame = true; + private ObjectManager() throws ServiceFailure, IllegalArgumentException, IOException { + if (storage == null) { + if (storage == null) { + storage = StorageFactory.getStorage(); + } } - logger.info("ObjectManager.constructor - the root document directory is " + - documentRootDir + " and the root data directory is " + dataRootDir + - " Are they same?" + ifDataAndDocRootSame); if (d1Node == null) { - refreshD1Node(); + if (d1Node == null) { + refreshD1Node(); + } } else { - logger.info("ObjectManager ---NOT going to create the d1node with the url " + nodeBaseURL + - " since the ObjectManager already was assigned a d1node with the url " + d1Node.getNodeBaseServiceUrl()); + logger.info("ObjectManager ---NOT going to create the d1node with the url " + nodeBaseURL + + " since the ObjectManager already was assigned a d1node with the url " + + d1Node.getNodeBaseServiceUrl()); } } - + /** * Get an ObjectManager instance through the singleton pattern. * @return the instance of ObjectManager - * @throws ServiceFailure + * @throws ServiceFailure + * @throws IOException + * @throws IllegalArgumentException */ - public static ObjectManager getInstance() throws ServiceFailure { + public static ObjectManager getInstance() throws ServiceFailure, + IllegalArgumentException, IOException { if (manager == null) { synchronized (ObjectManager.class) { if (manager == null) { @@ -124,63 +115,37 @@ public static ObjectManager getInstance() throws ServiceFailure { } return manager; } - - /** - * Get the absolute file path for a given relative path. If the relativePath is null or blank, - * null will be returned - * @param relativePath - * @param objectFormat - * @return the absolute file path - * @throws NotFound - */ - public String getFilePath(String relativePath, String objectFormat) throws NotFound { - String absolutePath = null; - if (relativePath != null && !relativePath.trim().equals("")) { - if (ifDataAndDocRootSame) { - absolutePath = documentRootDir + relativePath; - } else if (objectFormat != null && !objectFormat.trim().equals("")) { - ObjectFormat format =ObjectFormatCache.getInstance().getFormat(objectFormat); - if (format.getFormatType().equals("METADATA")) { - absolutePath = documentRootDir + relativePath; - } else { - absolutePath = dataRootDir + relativePath; - } - } - } - logger.debug("ObjectManager.getFilePath - the absolute file path for the relative file path " + - relativePath + " is " + absolutePath); - return absolutePath; - } - + /** * Get the system metadata for the given id * @param id the id to identify the system metadata - * @param objectRelativePath the object path for this id. It can help to determine - * the system metadata file if the system metadata file exists. - * @return the system metadata associated with the id + * @return the input stream of the system metadata associated with the id. It may be null. * @throws InvalidToken * @throws NotAuthorized * @throws NotImplemented * @throws ServiceFailure * @throws NotFound - * @throws MarshallingException - * @throws IOException - * @throws IllegalAccessException - * @throws InstantiationException + * @throws MarshallingException + * @throws IOException + * @throws IllegalAccessException + * @throws InstantiationException */ - public SystemMetadata getSystemMetadata(String id, String relativeObjPath) throws InvalidToken, NotAuthorized, NotImplemented, - ServiceFailure, NotFound, InstantiationException, IllegalAccessException, IOException, MarshallingException { - SystemMetadata sysmeta = null; + public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, + NotImplemented, ServiceFailure, NotFound, InstantiationException, + IllegalAccessException, IOException, MarshallingException { long start = System.currentTimeMillis(); - //try to get the system metadata from the file system first - File sysmetaFile = getSysmetaFile(relativeObjPath); - if (sysmetaFile != null) { - sysmeta = TypeMarshaller.unmarshalTypeFromFile(SystemMetadata.class, sysmetaFile); + //try to get the system metadata from the storage system first + InputStream sysmetaInputStream = null; + try { + sysmetaInputStream = storage.retrieveSystemMetadata(id); long end = System.currentTimeMillis(); - logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via the file system for the pid " + id + - " and it took " + (end - start) + "milliseconds"); - } else { - //if we can't get it from the file system, get it from dataone API + logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via " + + "the file system for the pid " + id + + " and it took " + (end - start) + "milliseconds"); + } catch (FileNotFoundException exception ) { + // Metacat can't find the system metadata from the storage system. + // So try to get it from the dataone api + SystemMetadata sysmeta = null; Identifier identifier = new Identifier(); identifier.setValue(id); try { @@ -200,20 +165,67 @@ public SystemMetadata getSystemMetadata(String id, String relativeObjPath) throw continue; } } - logger.debug("ObjectManager.getSystemMetadata - finish getting the system metadata via the DataONE API call for the pid " + id); + logger.debug("ObjectManager.getSystemMetadata - finish getting the system metadata " + + "via the DataONE API call for the pid " + id); } catch (NotAuthorized e) { - logger.info("ObjectManager.getSystemMetadata - failed to get the system metadata via the DataONE API call for the pid " + id + - " since it is not authorized. We will refresh the token and try again"); + logger.info("ObjectManager.getSystemMetadata - failed to get the system metadata " + + "via the DataONE API call for the pid " + id + + " since it is not authorized. We will refresh the token and try again"); refreshD1Node(); sysmeta = d1Node.getSystemMetadata(session, identifier); } + if (sysmeta != null) { + ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); + TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); + sysmetaInputStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + } long end = System.currentTimeMillis(); - logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via DataONE API for the pid " + id + - " and it took " + (end - start) + "milliseconds"); + logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via " + + "DataONE API for the pid " + id + " and it took " + + (end - start) + "milliseconds"); + } + return sysmetaInputStream; + } + + /** + * Get the system metadata object for the given identifier + * @param id the id to identify the system metadata + * @return the system metadata object associated with the id. It may be null. + * @throws InvalidToken + * @throws NotAuthorized + * @throws NotImplemented + * @throws ServiceFailure + * @throws NotFound + * @throws InstantiationException + * @throws IllegalAccessException + * @throws IOException + * @throws MarshallingException + */ + public org.dataone.service.types.v1.SystemMetadata getSystemMetadata(String id) + throws InvalidToken, NotAuthorized, + NotImplemented, ServiceFailure, NotFound, + InstantiationException, IllegalAccessException, + IOException, MarshallingException { + org.dataone.service.types.v1.SystemMetadata sysmeta = null; + try (InputStream input = getSystemMetadataStream(id)) { + if (input != null) { + try { + SystemMetadata sysmeta2 = TypeMarshaller + .unmarshalTypeFromStream(SystemMetadata.class, input); + sysmeta = sysmeta2; + } catch (Exception e) { + try (InputStream input2 = getSystemMetadataStream(id)) { + if (input2 != null) { + sysmeta = TypeMarshaller.unmarshalTypeFromStream( + org.dataone.service.types.v1.SystemMetadata.class, input2); + } + } + } + } } return sysmeta; } - + /** * Set the d1 node for this object manager. * We only use it for testing @@ -222,58 +234,7 @@ public SystemMetadata getSystemMetadata(String id, String relativeObjPath) throw public static void setD1Node(MultipartD1Node node) { d1Node = node; } - - /** - * Get the system metadata file path from the objectPath. - * We assume the object and system metadata file are in the same directory. - * The system metadata file has a fixed name - systemmetadata.xml - * @param relativeObjPath the relative path of the object - * @return the file of system metadata. If it is null, this means the system metadata file does not exist. - */ - protected static File getSysmetaFile(String relativeObjPath) { - File sysmetaFile = null; - String sysmetaPath = null; - String relativeSysmetaPath = null; - if (relativeObjPath != null) { - if (relativeObjPath.contains(File.separator)) { - logger.debug("ObjectManager.getSysmetaFile - the object file path " + relativeObjPath + " has at least one path separator " + File.pathSeparator); - relativeSysmetaPath = relativeObjPath.substring(0, relativeObjPath.lastIndexOf(File.separator) + 1) + SYSTEMMETA_FILE_NAME; - } else { - logger.debug("ObjectManager.getSysmetaFile - the object file path " + relativeObjPath + " doesnot have any path separator " + File.pathSeparator); - //There is not path information in the object path ( it only has the file name). So we just simply return systemmetadata.xml - relativeSysmetaPath = SYSTEMMETA_FILE_NAME; - } - logger.debug("ObjectManager.getSysmetaFile - the relative system metadata file path for the object path " + - relativeObjPath + " is " + relativeSysmetaPath); - if (ifDataAndDocRootSame) { - sysmetaPath = documentRootDir + relativeSysmetaPath; - sysmetaFile = new File(sysmetaPath); - if (!sysmetaFile.exists()) { - //the system metadata file doesn't exist and we set it to null - sysmetaPath = null; - sysmetaFile = null; - } - } else { - //try if this object is a document first since we have no idea if the object is metadata or data. - sysmetaPath = documentRootDir + relativeSysmetaPath; - sysmetaFile = new File(sysmetaPath); - if (!sysmetaFile.exists()) { - // try data - sysmetaPath = dataRootDir + relativeSysmetaPath; - sysmetaFile = new File(sysmetaPath); - if (!sysmetaFile.exists()) { - //the system metadata file doesn't exist and we set it to null - sysmetaPath = null; - sysmetaFile = null; - } - } - } - } - logger.debug("ObjectManager.getSysmetaFile - the final system metadata file path for the object path " + - relativeObjPath + " is " + sysmetaPath + ". Null means that not system metadata file exists."); - return sysmetaFile; - } - + /** * In case the token expired, the method will retrieve the token and create a new d1 node * @throws ServiceFailure From 847b986d254f1437187848c1348a1ec149032bfb Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 25 Jun 2024 13:34:26 -0700 Subject: [PATCH 05/21] Change the call to get the system metadata. --- .../resourcemap/ForesiteResourceMap.java | 112 ++++++++---------- 1 file changed, 49 insertions(+), 63 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java index 55aa8a4f..5cca6949 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/ForesiteResourceMap.java @@ -1,25 +1,3 @@ -/** - * This work was created by participants in the DataONE project, and is - * jointly copyrighted by participating institutions in DataONE. For - * more information on DataONE, see our web site at http://dataone.org. - * - * Copyright ${year} - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * $Id$ - */ - package org.dataone.cn.indexer.resourcemap; import java.io.ByteArrayOutputStream; @@ -218,60 +196,65 @@ private void _init(InputStream is) throws OREException, URISyntaxException, public static boolean representsResourceMap(String formatId) { return RESOURCE_MAP_FORMAT.equals(formatId); } - + private boolean isHeadVersion(Identifier pid, Identifier sid) { boolean isHead = true; if(pid != null && sid != null) { - /*Identifier newId = new Identifier(); - newId.setValue("peggym.130.5"); - if(pid.getValue().equals("peggym.130.4") && HazelcastClientFactory.getSystemMetadataMap().get(newId) != null) { - isHead =false; - } else if (pid.getValue().equals("peggym.130.4") && HazelcastClientFactory.getSystemMetadataMap().get(newId) == null) { - isHead = true; - }*/ Identifier head = null; try { - head = SeriesIdResolver.getPid(sid);//if the passed sid actually is a pid, the method will return the pid. + //if the passed sid actually is a pid, the method will return the pid. + head = SeriesIdResolver.getPid(sid); } catch (Exception e) { - System.out.println(""+e.getStackTrace()); isHead = true; } if(head != null ) { - //System.out.println("||||||||||||||||||| the head version is "+ head.getValue()+" for sid "+sid.getValue()); - logger.info("||||||||||||||||||| the head version is "+ head.getValue()+" for sid "+sid.getValue()); + + logger.info("||||||||||||||||||| the head version is " + head.getValue() + + " for sid " + sid.getValue()); if(head.equals(pid)) { - logger.info("||||||||||||||||||| the pid "+ pid.getValue()+" is the head version for sid "+sid.getValue()); + logger.info("||||||||||||||||||| the pid " + pid.getValue() + + " is the head version for sid " + sid.getValue()); isHead=true; } else { - logger.info("||||||||||||||||||| the pid "+ pid.getValue()+" is NOT the head version for sid "+sid.getValue()); + logger.info("||||||||||||||||||| the pid " + pid.getValue() + + " is NOT the head version for sid " + sid.getValue()); isHead=false; } } else { - //System.out.println("||||||||||||||||||| can't find the head version for sid "+sid.getValue()); - logger.info("||||||||||||||||||| can't find the head version for sid "+sid.getValue() + " and we think the given pid "+pid.getValue()+" is the head version."); + logger.info("||||||||||||||||||| can't find the head version for sid " + + sid.getValue() + " and we think the given pid " + pid.getValue() + + " is the head version."); } } return isHead; } - private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc mergeDocument) throws InvalidToken, NotAuthorized, NotImplemented, - ServiceFailure, NotFound, InstantiationException, IllegalAccessException, IOException, MarshallingException { - - Identifier identifier = new Identifier(); - identifier.setValue(mergeDocument.getIdentifier()); - //SystemMetadata sysMeta = HazelcastClientFactory.getSystemMetadataMap().get(identifier); - String relativeObjPath = null; //we don't know the path - SystemMetadata sysMeta = ObjectManager.getInstance().getSystemMetadata(identifier.getValue(), relativeObjPath); - if (sysMeta.getSeriesId() != null && sysMeta.getSeriesId().getValue() != null && !sysMeta.getSeriesId().getValue().trim().equals("")) { - // skip this one - if(!isHeadVersion(identifier, sysMeta.getSeriesId())) { - //System.out.println("The id "+identifier+" is not the head of the serial id "+sysMeta.getSeriesId().getValue()+" So, skip merge this one!!!!!!!!!!!!!!!!!!!!!!"+mergeDocument.getIdentifier()); - logger.info("The id "+identifier+" is not the head of the serial id "+sysMeta.getSeriesId().getValue()+" So, skip merge this one!!!!!!!!!!!!!!!!!!!!!!"+mergeDocument.getIdentifier()); - return mergeDocument; - } - - } - + private SolrDoc _mergeMappedReference(ResourceEntry resourceEntry, SolrDoc mergeDocument) + throws InvalidToken, NotAuthorized, NotImplemented, + ServiceFailure, NotFound, InstantiationException, + IllegalAccessException, IOException, MarshallingException { + + Identifier identifier = new Identifier(); + identifier.setValue(mergeDocument.getIdentifier()); + try { + SystemMetadata sysMeta = (SystemMetadata) ObjectManager.getInstance() + .getSystemMetadata(identifier.getValue()); + if (sysMeta.getSeriesId() != null && sysMeta.getSeriesId().getValue() != null + && !sysMeta.getSeriesId().getValue().trim().equals("")) { + // skip this one + if(!isHeadVersion(identifier, sysMeta.getSeriesId())) { + logger.info("The id " + identifier + " is not the head of the serial id " + + sysMeta.getSeriesId().getValue() + + " So, skip merge this one!!!!!!!!!!!!!!!!!!!!!!" + + mergeDocument.getIdentifier()); + return mergeDocument; + } + } + } catch (ClassCastException e) { + logger.warn("The systemmetadata is a v1 object and we need to do nothing"); + } + + if (mergeDocument.hasField(SolrElementField.FIELD_ID) == false) { mergeDocument.addField(new SolrElementField(SolrElementField.FIELD_ID, resourceEntry .getIdentifier())); @@ -362,19 +345,22 @@ public List mergeIndexedDocuments(List docs) { List mergedDocuments = new ArrayList(); for (ResourceEntry resourceEntry : this.resourceMap.values()) { for (SolrDoc doc : docs) { - //System.out.println(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc id is "+doc.getIdentifier() +" in the thread "+Thread.currentThread().getId()); - //System.out.println(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc series id is "+doc.getSeriesId()+" in the thread "+Thread.currentThread().getId()); - //System.out.println(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the resource entry id is "+resourceEntry.getIdentifier()+" in the thread "+Thread.currentThread().getId()); - logger.debug(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc id is "+doc.getIdentifier() +" in the thread "+Thread.currentThread().getId()); - logger.debug(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the doc series id is "+doc.getSeriesId()+" in the thread "+Thread.currentThread().getId()); - logger.debug(">>>>>>>>in mergeIndexedDocuments of ForesiteResourceMap, the resource entry id is "+resourceEntry.getIdentifier()+" in the thread "+Thread.currentThread().getId()); + + logger.debug("in mergeIndexedDocuments of ForesiteResourceMap, the doc id is " + + doc.getIdentifier() + " in the thread "+Thread.currentThread().getId()); + logger.debug("in mergeIndexedDocuments of ForesiteResourceMap, the doc series id is " + + doc.getSeriesId() + " in the thread "+Thread.currentThread().getId()); + logger.debug("in mergeIndexedDocuments of ForesiteResourceMap, the resource entry id is " + + resourceEntry.getIdentifier() + " in the thread " + + Thread.currentThread().getId()); if (doc.getIdentifier().equals(resourceEntry.getIdentifier()) || resourceEntry.getIdentifier().equals(doc.getSeriesId())) { try { mergedDocuments.add(_mergeMappedReference(resourceEntry, doc)); } catch (Exception e) { - logger.error("ForestieResourceMap.mergeIndexedDocuments - cannot merge the document since " + e.getMessage()); + logger.error("ForestieResourceMap.mergeIndexedDocuments - cannot merge the document since " + + e.getMessage()); } } From e1e354fb07b79664ace2e5d59c1fbd2a8485b5db Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 25 Jun 2024 14:32:19 -0700 Subject: [PATCH 06/21] Used the new method to get the system metadata. --- .../cn/indexer/parser/utility/SeriesIdResolver.java | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java b/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java index e921a055..ca5fdec7 100644 --- a/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java +++ b/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java @@ -100,19 +100,12 @@ public static boolean isSeriesId(Identifier identifier) InstantiationException, IllegalAccessException, IOException, MarshallingException { // if we have system metadata available via HZ map, then it's a PID - String relativeObjPath = null;//we don't know the path - SystemMetadata systemMetadata = - ObjectManager.getInstance().getSystemMetadata(identifier.getValue(), relativeObjPath); + org.dataone.service.types.v1.SystemMetadata systemMetadata = ObjectManager.getInstance() + .getSystemMetadata(identifier.getValue()); if (systemMetadata != null) { return false; } - //TODO: check that it's not just bogus value by looking up the pid? -// Identifier pid = getPid(identifier); -// if (pid.equals(identifier)) { -// return false; -// } - // okay, it's a SID return true; From 186e739bb0ab25fd468b9421526757676b8511aa Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 25 Jun 2024 15:45:02 -0700 Subject: [PATCH 07/21] Changed the code to get the system metadata. --- .../resourcemap/IndexVisibilityDelegateImpl.java | 12 ++++-------- .../org/dataone/cn/indexer/solrhttp/SolrDoc.java | 2 +- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java index 1f375ba6..1ad05400 100644 --- a/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java +++ b/src/main/java/org/dataone/cn/indexer/resourcemap/IndexVisibilityDelegateImpl.java @@ -12,7 +12,7 @@ import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.ServiceFailure; import org.dataone.service.types.v1.Identifier; -import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.types.v1.SystemMetadata; public class IndexVisibilityDelegateImpl implements IndexVisibilityDelegate { @@ -25,10 +25,8 @@ public IndexVisibilityDelegateImpl() { public boolean isDocumentVisible(Identifier pid) { boolean visible = false; try { - - //SystemMetadata systemMetadata = HazelcastClientFactory.getSystemMetadataMap().get(pid); - String relativeObjPath = null; //we don't know the path - SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativeObjPath); + SystemMetadata systemMetadata = ObjectManager.getInstance() + .getSystemMetadata(pid.getValue()); // TODO: Is pid Identifier a SID? if (systemMetadata == null) { return true; @@ -63,9 +61,7 @@ public boolean isDocumentVisible(Identifier pid) { public boolean documentExists(Identifier pid) { boolean exists = false; try { - //SystemMetadata systemMetadata = HazelcastClientFactory.getSystemMetadataMap().get(pid); - String relativeObjPath = null; //we don't know the path - SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativeObjPath); + SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue()); if (systemMetadata != null) { exists = true; } else { diff --git a/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java b/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java index 01271732..20d3ba06 100644 --- a/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java +++ b/src/main/java/org/dataone/cn/indexer/solrhttp/SolrDoc.java @@ -28,7 +28,7 @@ import java.util.List; import org.apache.commons.io.IOUtils; -import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.types.v1.SystemMetadata; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; From ad2dd13721f40be921140e1a0f111755c89cbd38 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Tue, 25 Jun 2024 16:51:35 -0700 Subject: [PATCH 08/21] Use the hashstore method to get system metadata and object. --- .../org/dataone/cn/indexer/SolrIndex.java | 78 +++++++------------ .../cn/indexer/object/ObjectManager.java | 22 +++++- .../resourcemap/ForesiteResourceMap.java | 3 +- .../IndexVisibilityDelegateImpl.java | 5 ++ 4 files changed, 53 insertions(+), 55 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index cf8ace04..3ccdfce3 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -24,6 +24,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.net.MalformedURLException; import java.util.ArrayList; import java.util.Collection; @@ -169,10 +170,9 @@ public void setDeleteSubprocessors( /** * Generate the index for the given information - * @param id - * @param systemMetadata - * @param dataStream - * @return + * @param id the id which will be indexed + * @param isSystemetaChange if this is a change on the system metadata only + * @return a map of solr doc with ids * @throws IOException * @throws SAXException * @throws ParserConfigurationException @@ -184,8 +184,7 @@ public void setDeleteSubprocessors( * @throws NotFound * @throws NotImplemented */ - private Map process(String id, SystemMetadata systemMetadata, - String objectPath, boolean isSysmetaChangeOnly) + private Map process(String id, boolean isSysmetaChangeOnly) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException, MarshallingException, EncoderException, SolrServerException, NotImplemented, NotFound, UnsupportedType{ @@ -193,10 +192,7 @@ private Map process(String id, SystemMetadata systemMetadata, long start = System.currentTimeMillis(); Map docs = new HashMap(); // Load the System Metadata document - ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); - TypeMarshaller.marshalTypeToOutputStream(systemMetadata, systemMetadataOutputStream); - ByteArrayInputStream systemMetadataStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); - try { + try (InputStream systemMetadataStream = ObjectManager.getInstance().getSystemMetadataStream(id)){ docs = systemMetadataProcessor.processDocument(id, docs, systemMetadataStream); } catch (Exception e) { log.error(e.getMessage(), e); @@ -219,12 +215,11 @@ private Map process(String id, SystemMetadata systemMetadata, + ". Even though this is a systemmetadata-change-only event, we can NOT " + "just reindex the systemmeta only."); } - } - log.debug("SolrIndex.process - the value of skipOtherProcessors is " + skipOtherProcessor + - " and the object path is " + objectPath + " for the id " + id); + log.debug("SolrIndex.process - the value of skipOtherProcessors is " + skipOtherProcessor + + " for the id " + id); //if the objectPath is null, we should skip the other processes - if (!skipOtherProcessor && objectPath != null) { + if (!skipOtherProcessor) { log.debug("SolrIndex.process - Start to use subprocessor list to process " + id); // Determine if subprocessors are available for this ID if (subprocessors != null) { @@ -234,31 +229,21 @@ private Map process(String id, SystemMetadata systemMetadata, if (subprocessor.canProcess(formatId)) { // if so, then extract the additional information from the // document. - try { + try (InputStream dataStream = ObjectManager.getInstance().getObject(id)) { // docObject = the resource map document or science // metadata document. // note that resource map processing touches all objects // referenced by the resource map. - FileInputStream dataStream = new FileInputStream(objectPath); - if (!dataStream.getFD().valid()) { - log.error("SolrIndex.process - subprocessor " - + subprocessor.getClass().getName() - + " couldn't process since it could not load OBJECT file for ID,Path=" - + id + ", " + objectPath); - //throw new Exception("Could not load OBJECT for ID " + id ); - } else { - start = System.currentTimeMillis(); - docs = subprocessor.processDocument(id, docs, dataStream); - end = System.currentTimeMillis(); - log.info("SolrIndex.process - the time for calling processDocument " - + "for the subprocessor " + subprocessor.getClass().getName() - +" for the pid " + id + " is " + (end-start) + "milliseconds."); - log.debug("SolrIndex.process - subprocessor " - + subprocessor.getClass().getName() - +" generated solr doc for id "+id); - } + start = System.currentTimeMillis(); + docs = subprocessor.processDocument(id, docs, dataStream); + end = System.currentTimeMillis(); + log.info("SolrIndex.process - the time for calling processDocument " + + "for the subprocessor " + subprocessor.getClass().getName() + +" for the pid " + id + " is " + (end-start) + "milliseconds."); + log.debug("SolrIndex.process - subprocessor " + + subprocessor.getClass().getName() + +" generated solr doc for id "+id); } catch (Exception e) { - e.printStackTrace(); log.error(e.getMessage(), e); throw new SolrServerException(e.getMessage()); } @@ -365,19 +350,13 @@ private boolean isSystemMetadataField(String fieldName) { /** * Check the parameters of the insert or update methods. - * @param pid - * @param systemMetadata - * @param data + * @param pid the pid which will be indexed * @throws SolrServerException */ - private void checkParams(Identifier pid, SystemMetadata systemMetadata, String objectPath) throws InvalidRequest { + private void checkParams(Identifier pid) throws InvalidRequest { if(pid == null || pid.getValue() == null || pid.getValue().trim().equals("")) { throw new InvalidRequest("0000", "The identifier of the indexed document should not be null or blank."); } - if(systemMetadata == null) { - throw new InvalidRequest("0000", "The system metadata of the indexed document " - + pid.getValue() + " should not be null."); - } } /** @@ -393,15 +372,14 @@ private void checkParams(Identifier pid, SystemMetadata systemMetadata, String o * @throws NotImplemented * @throws InvalidRequest */ - private void insert(Identifier pid, SystemMetadata systemMetadata, - String objectPath, boolean isSysmetaChangeOnly) + private void insert(Identifier pid, boolean isSysmetaChangeOnly) throws IOException, SAXException, ParserConfigurationException, InvalidRequest, XPathExpressionException, SolrServerException, MarshallingException, EncoderException, NotImplemented, NotFound, UnsupportedType { - checkParams(pid, systemMetadata, objectPath); + checkParams(pid); log.debug("SolrIndex.insert - trying to insert the solrDoc for object "+pid.getValue()); long start = System.currentTimeMillis(); - Map docs = process(pid.getValue(), systemMetadata, objectPath, isSysmetaChangeOnly); + Map docs = process(pid.getValue(), isSysmetaChangeOnly); long end = System.currentTimeMillis(); log.info("SolrIndex.insert - the subprocessor processing time of " + pid.getValue() + " is " + (end-start) + " milliseconds."); @@ -471,11 +449,8 @@ public void update(Identifier pid, String relativePath, boolean isSysmetaChangeO InstantiationException, IllegalAccessException { log.debug("SolrIndex.update - trying to update(insert or remove) solr index of object " + pid.getValue()); - String objectPath = null; - SystemMetadata systemMetadata = ObjectManager.getInstance().getSystemMetadata(pid.getValue(), relativePath); - objectPath = ObjectManager.getInstance().getFilePath(relativePath, systemMetadata.getFormatId().getValue()); try { - insert(pid, systemMetadata, objectPath, isSysmetaChangeOnly); + insert(pid, isSysmetaChangeOnly); } catch (SolrServerException e) { if (e.getMessage().contains(VERSION_CONFLICT) && VERSION_CONFLICT_MAX_ATTEMPTS > 0) { log.info("SolrIndex.update - Indexer grabbed an older verion (version conflict) of " @@ -484,8 +459,7 @@ public void update(Identifier pid, String relativePath, boolean isSysmetaChangeO for (int i=0; i Date: Wed, 26 Jun 2024 11:48:10 -0700 Subject: [PATCH 09/21] Removed a unused method. --- .../parser/utility/SeriesIdResolver.java | 29 ------------------- 1 file changed, 29 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java b/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java index ca5fdec7..ca181ed6 100644 --- a/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java +++ b/src/main/java/org/dataone/cn/indexer/parser/utility/SeriesIdResolver.java @@ -80,35 +80,6 @@ public static Identifier getPid(Identifier identifier) return pid; } - /** - * Check if the given identifier is a PID or a SID - * - * @param identifier - * @return true if the identifier is a SID, false if a PID - * @throws NotFound - * @throws ServiceFailure - * @throws NotImplemented - * @throws NotAuthorized - * @throws InvalidToken - * @throws MarshallingException - * @throws IOException - * @throws IllegalAccessException - * @throws InstantiationException - */ - public static boolean isSeriesId(Identifier identifier) - throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, - InstantiationException, IllegalAccessException, IOException, MarshallingException { - - // if we have system metadata available via HZ map, then it's a PID - org.dataone.service.types.v1.SystemMetadata systemMetadata = ObjectManager.getInstance() - .getSystemMetadata(identifier.getValue()); - if (systemMetadata != null) { - return false; - } - - // okay, it's a SID - return true; - } } From c2b27cb01bc6f6ef0c771735e8401a03ae497e1b Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 26 Jun 2024 12:04:21 -0700 Subject: [PATCH 10/21] Deleted two unused methods. --- .../cn/index/DataONESolrJettyTestBase.java | 43 +------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java index 57ad01ae..ede9ed93 100644 --- a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java +++ b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java @@ -1,25 +1,3 @@ -/** - * This work was created by participants in the DataONE project, and is - * jointly copyrighted by participating institutions in DataONE. For - * more information on DataONE, see our web site at http://dataone.org. - * - * Copyright ${year} - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * $Id$ - */ - package org.dataone.cn.index; import java.io.File; @@ -102,7 +80,7 @@ protected void indexObjectToSolr(String identifier, Resource objectFile) throws pid.setValue(identifier); solrIndexService.update(pid, relativePath, isSysmetaChangeOnly); } - + /** * Delete the given identifier from the solr server * @param identifier @@ -125,25 +103,6 @@ protected void deleteSolrDoc(String identifier) throws XPathExpressionException, solrIndexService.remove(pid); } - protected void addEmlToSolrIndex(Resource sysMetaFile) throws Exception { - SolrIndex indexService = solrIndexService; - SystemMetadata smd = TypeMarshaller.unmarshalTypeFromStream(SystemMetadata.class, - sysMetaFile.getInputStream()); - // path to actual science metadata document - String path = StringUtils.remove(sysMetaFile.getFile().getPath(), File.separator + "SystemMetadata"); - boolean isSysmetaChangeOnly = false; - indexService.update(smd.getIdentifier(), path, isSysmetaChangeOnly); - - } - - protected void addSysAndSciMetaToSolrIndex(Resource sysMeta, Resource sciMeta) throws Exception { - SolrIndex indexService = solrIndexService; - SystemMetadata smd = TypeMarshaller.unmarshalTypeFromStream(SystemMetadata.class, - sysMeta.getInputStream()); - String path = sciMeta.getFile().getAbsolutePath(); - boolean isSysmetaChangeOnly = false; - indexService.update(smd.getIdentifier(), path, isSysmetaChangeOnly); - } protected SolrDocument assertPresentInSolrIndex(String pid) throws SolrServerException, IOException { From f1441b6a7d135fe38833cee8a7f2398f155b9ac6 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 26 Jun 2024 13:33:05 -0700 Subject: [PATCH 11/21] Added a save method for the test classes. Fixed a bug to use storage in the hashstore implementation. --- .../org/dataone/indexer/storage/HashStorage.java | 10 ++++++++-- .../java/org/dataone/indexer/storage/Storage.java | 14 ++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/dataone/indexer/storage/HashStorage.java b/src/main/java/org/dataone/indexer/storage/HashStorage.java index 2bf632f0..0aaf2771 100644 --- a/src/main/java/org/dataone/indexer/storage/HashStorage.java +++ b/src/main/java/org/dataone/indexer/storage/HashStorage.java @@ -73,14 +73,20 @@ public static HashStorage getInstance(String className) throws IOException { public InputStream retrieveObject(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { - return hashStorage.retrieveObject(pid); + return hashStore.retrieveObject(pid); } @Override public InputStream retrieveSystemMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException { - return hashStorage.retrieveSystemMetadata(pid); + return hashStore.retrieveMetadata(pid); + } + + @Override + public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmException, + IOException,RuntimeException, InterruptedException { + hashStore.storeObject(object, pid, null, null, null, -1); } } diff --git a/src/main/java/org/dataone/indexer/storage/Storage.java b/src/main/java/org/dataone/indexer/storage/Storage.java index e138d976..bfe5077b 100644 --- a/src/main/java/org/dataone/indexer/storage/Storage.java +++ b/src/main/java/org/dataone/indexer/storage/Storage.java @@ -5,6 +5,8 @@ import java.io.InputStream; import java.security.NoSuchAlgorithmException; +import org.dataone.hashstore.exceptions.PidRefsFileExistsException; + /** * The Storage represents the interface to access the objects and system metadata @@ -38,4 +40,16 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, public InputStream retrieveSystemMetadata(String pid) throws IllegalArgumentException, FileNotFoundException, IOException, NoSuchAlgorithmException; + /** + * Store the input stream object into hash store. This method is only for the test classes. + * @param object the input stream of the object + * @param pid the pid which will be stored + * @throws NoSuchAlgorithmException + * @throws IOException + * @throws RuntimeException + * @throws InterruptedException + */ + public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmException, + IOException,RuntimeException, InterruptedException; + } From e643ae458d703a9858bb48249638f3f80967224a Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 26 Jun 2024 13:57:12 -0700 Subject: [PATCH 12/21] Added the hastore properties. --- .../org/dataone/configuration/index-processor.properties | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/test/resources/org/dataone/configuration/index-processor.properties b/src/test/resources/org/dataone/configuration/index-processor.properties index 05cb1b1e..256dd8d8 100644 --- a/src/test/resources/org/dataone/configuration/index-processor.properties +++ b/src/test/resources/org/dataone/configuration/index-processor.properties @@ -42,3 +42,12 @@ index.resourcemap.namespace=http://www.w3.org/TR/rdf-syntax-grammar;http://www.o dataone.mn.registration.serviceType.url=https://cn-sandbox-ucsb-1.test.dataone.org/mnServiceTypes.xml cn.router.hostname=cn.dataone.org + +# Storage properties +storage.className=org.dataone.hashstore.filehashstore.FileHashStore +storage.hashstore.rootDirectory=./target/hashstore +storage.hashstore.defaultNamespace=https://ns.dataone.org/service/types/v2.0#SystemMetadata +# The following three properties must NOT be modified after the hash store is initialized +storage.hashstore.fileNameAlgorithm=SHA-256 +storage.hashstore.directory.width=2 +storage.hashstore.directory.depth=3 From 03ed1ba82f87805526c7a74203f729ec12af2c56 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 26 Jun 2024 13:59:08 -0700 Subject: [PATCH 13/21] Added the code to store objects into hash store. --- .../java/org/dataone/cn/index/DataONESolrJettyTestBase.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java index ede9ed93..ef2a6091 100644 --- a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java +++ b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java @@ -2,6 +2,7 @@ import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; @@ -33,6 +34,7 @@ import org.dataone.cn.indexer.parser.ISolrField; import org.dataone.cn.indexer.solrhttp.SolrElementField; import org.dataone.configuration.Settings; +import org.dataone.indexer.storage.StorageFactory; import org.dataone.service.exceptions.NotFound; import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.ServiceFailure; @@ -78,6 +80,10 @@ protected void indexObjectToSolr(String identifier, Resource objectFile) throws String relativePath = objectFile.getFile().getPath(); Identifier pid = new Identifier(); pid.setValue(identifier); + // Save the object into hashstore + try (InputStream object = objectFile.getInputStream()) { + StorageFactory.getStorage().storeObject(object, identifier); + } solrIndexService.update(pid, relativePath, isSysmetaChangeOnly); } From 32bec87d59db9f829a778f447e4fc9f78464095a Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 26 Jun 2024 14:57:21 -0700 Subject: [PATCH 14/21] Added the method to store system metadata. This method is only for the test classes. --- .../org/dataone/indexer/storage/HashStorage.java | 7 +++++++ .../java/org/dataone/indexer/storage/Storage.java | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/dataone/indexer/storage/HashStorage.java b/src/main/java/org/dataone/indexer/storage/HashStorage.java index 0aaf2771..63795e31 100644 --- a/src/main/java/org/dataone/indexer/storage/HashStorage.java +++ b/src/main/java/org/dataone/indexer/storage/HashStorage.java @@ -89,4 +89,11 @@ public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmEx hashStore.storeObject(object, pid, null, null, null, -1); } + @Override + public void storeMetadata(InputStream metadata, String pid) throws IOException, + IllegalArgumentException, FileNotFoundException, + InterruptedException, NoSuchAlgorithmException { + hashStore.storeMetadata(metadata, pid); + } + } diff --git a/src/main/java/org/dataone/indexer/storage/Storage.java b/src/main/java/org/dataone/indexer/storage/Storage.java index bfe5077b..ada10334 100644 --- a/src/main/java/org/dataone/indexer/storage/Storage.java +++ b/src/main/java/org/dataone/indexer/storage/Storage.java @@ -43,7 +43,7 @@ public InputStream retrieveSystemMetadata(String pid) throws IllegalArgumentExce /** * Store the input stream object into hash store. This method is only for the test classes. * @param object the input stream of the object - * @param pid the pid which will be stored + * @param pid the identifier of the object which will be stored * @throws NoSuchAlgorithmException * @throws IOException * @throws RuntimeException @@ -52,4 +52,17 @@ public InputStream retrieveSystemMetadata(String pid) throws IllegalArgumentExce public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmException, IOException,RuntimeException, InterruptedException; + /** + * Store the system metadata into hash store. This method is only for the test classes. + * @param metadata the input stream of the sytem metadata + * @param pid the identifier of the system metadata + * @throws IOException + * @throws IllegalArgumentException + * @throws FileNotFoundException + * @throws InterruptedException + * @throws NoSuchAlgorithmException + */ + public void storeMetadata(InputStream metadata, String pid) throws IOException, + IllegalArgumentException, FileNotFoundException, + InterruptedException, NoSuchAlgorithmException; } From d4b70892121a4fed99f9e1f392a2c6e47ee8f62c Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 26 Jun 2024 15:34:55 -0700 Subject: [PATCH 15/21] Added the code to put the objects and system metadata into hashstore. --- .../cn/index/DataONESolrJettyTestBase.java | 50 +++++++++++++++++-- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java index ef2a6091..064e0969 100644 --- a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java +++ b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java @@ -1,6 +1,8 @@ package org.dataone.cn.index; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.text.DateFormat; @@ -68,7 +70,8 @@ public abstract class DataONESolrJettyTestBase extends SolrJettyTestBase { private SolrIndex solrIndexService; private int solrPort = Settings.getConfiguration().getInt("test.solr.port", 8985); private static final String DEFAULT_SOL_RHOME = "solr8home"; - + private static final String SYSTEMMETA_FILE_NAME = "systemmetadata.xml"; + /** * Index the given object into solr * @param identifier the identifier of the object which needs to be indexed @@ -78,15 +81,52 @@ public abstract class DataONESolrJettyTestBase extends SolrJettyTestBase { protected void indexObjectToSolr(String identifier, Resource objectFile) throws Exception { boolean isSysmetaChangeOnly = false; String relativePath = objectFile.getFile().getPath(); + try { + StorageFactory.getStorage().retrieveObject(identifier); + } catch (FileNotFoundException e) { + // The pid is not in the hash store and we need to save the object into hashstore + try (InputStream object = objectFile.getInputStream()) { + StorageFactory.getStorage().storeObject(object, identifier); + } + File sysmetaFile = getSysmetaFile(relativePath); + if (sysmetaFile != null) { + try (InputStream sysmeta = new FileInputStream(sysmetaFile)) { + StorageFactory.getStorage().storeMetadata(sysmeta, identifier); + } + } + } Identifier pid = new Identifier(); pid.setValue(identifier); - // Save the object into hashstore - try (InputStream object = objectFile.getInputStream()) { - StorageFactory.getStorage().storeObject(object, identifier); - } solrIndexService.update(pid, relativePath, isSysmetaChangeOnly); } + /** + * The convention method to get the system metadata file path from the objectPath. + * We assume the object and system metadata file are in the same directory. + * The system metadata file has a fixed name - systemmetadata.xml + * @param relativeObjPath the relative path of the object + * @return the file of system metadata. If it is null, this means the system metadata file does not exist. + */ + private static File getSysmetaFile(String relativeObjPath) { + File sysmetaFile = null; + String sysmetaPath = null; + String relativeSysmetaPath = null; + if (relativeObjPath != null) { + if (relativeObjPath.contains(File.separator)) { + relativeSysmetaPath = relativeObjPath.substring(0, + relativeObjPath.lastIndexOf(File.separator) + 1) + SYSTEMMETA_FILE_NAME; + } else { + // There is not path information in the object path ( it only has the file name). + // So we just simply return systemmetadata.xml + relativeSysmetaPath = SYSTEMMETA_FILE_NAME; + } + } + if (relativeSysmetaPath != null) { + sysmetaFile = new File(relativeSysmetaPath); + } + return sysmetaFile; + } + /** * Delete the given identifier from the solr server * @param identifier From f810a6daccfd83e984558db86caaa87d442fdf64 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 26 Jun 2024 18:23:24 -0700 Subject: [PATCH 16/21] Modified the ObjectManagerTest class to use the new methods. --- .../cn/indexer/object/ObjectManager.java | 1 - .../cn/index/SolrFieldXPathFgdcTest.java | 4 +- .../cn/indexer/object/ObjectManagerTest.java | 154 ++++++++---------- .../fgdc/nasa_d_FEDGPS1293Sysmeta.xml | 4 +- 4 files changed, 71 insertions(+), 92 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 631d0f59..8da964de 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -69,7 +69,6 @@ public class ObjectManager { private static Storage storage = null; private static final String TOKEN_VARIABLE_NAME = "DATAONE_AUTH_TOKEN"; private static final String TOKEN_FILE_PATH_PROP_NAME = "dataone.nodeToken.file"; - private static final String SYSTEMMETA_FILE_NAME = "systemmetadata.xml"; private static MultipartD1Node d1Node = null; private static Session session = null; diff --git a/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java b/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java index 4e317dd1..3ab0d6fc 100644 --- a/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java +++ b/src/test/java/org/dataone/cn/index/SolrFieldXPathFgdcTest.java @@ -296,8 +296,8 @@ public static void setUp() throws Exception { fgdcNasaExpected.put("mediaTypeProperty", ""); fgdcNasaExpected.put("formatId", "FGDC-STD-001.1-1999"); fgdcNasaExpected.put("formatType", "METADATA"); - fgdcNasaExpected.put("size", "14880"); - fgdcNasaExpected.put("checksum", "c72ff66bbe7fa99e5fb399bab8cb6f85"); + fgdcNasaExpected.put("size", "14828"); + fgdcNasaExpected.put("checksum", "1755a557c13be7af44d676bb09274b0e"); fgdcNasaExpected.put("checksumAlgorithm", "MD5"); fgdcNasaExpected.put("submitter", "CN=Dave Vieglais T799,O=Google,C=US,DC=cilogon,DC=org"); fgdcNasaExpected.put("rightsHolder", diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index b50c6e5c..32cc9b12 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -1,33 +1,25 @@ -/** - * This work was created by participants in the DataONE project, and is - * jointly copyrighted by participating institutions in DataONE. For - * more information on DataONE, see our web site at http://dataone.org. - * - * Copyright 2022 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ package org.dataone.cn.indexer.object; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; + +import java.io.InputStream; +import java.io.OutputStream; +import java.security.MessageDigest; -import java.nio.file.Paths; +import javax.xml.bind.DatatypeConverter; -import org.dataone.cn.index.DataONESolrJettyTestBase; -import org.dataone.service.exceptions.NotFound; + +import org.dataone.indexer.storage.StorageFactory; +import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v2.SystemMetadata; +import org.dataone.service.util.TypeMarshaller; +import org.junit.Before; import org.junit.Test; /** @@ -36,73 +28,61 @@ * */ public class ObjectManagerTest { - - /** - * Test the getFilePath method - * @throws Exception - */ - @Test - public void testgetFilePath() throws Exception { - ObjectManager manager = ObjectManager.getInstance(); - String path = null; - String format = "eml://ecoinformatics.org/eml-2.0.1"; - String resultPath = manager.getFilePath(path, format); - assertTrue(resultPath == null); - format = "image/bmp"; - resultPath = manager.getFilePath(path, format); - assertTrue(resultPath == null); - - path = ""; - format = "eml://ecoinformatics.org/eml-2.0.1"; - resultPath = manager.getFilePath(path, format); - assertTrue(resultPath == null); - format = "image/bmp"; - resultPath = manager.getFilePath(path, format); - assertTrue(resultPath == null); - - path = "/var/metacat/documents/foo.1.1"; - format = "eml://ecoinformatics.org/eml-2.0.1"; - resultPath = manager.getFilePath(path, format); - assertTrue(resultPath.equals("//var/metacat/documents/foo.1.1")); - - path = "/var/metacat/documents/foo.2.1"; - format = "image/bmp";; - resultPath = manager.getFilePath(path, format); - assertTrue(resultPath.equals("//var/metacat/documents/foo.2.1")); + + private String identifier; + + @Before + public void setUp() throws Exception { + identifier = "ObjectManagerTest-" + System.currentTimeMillis(); + File objectFile = new File("src/test/resources/org/dataone/cn/index/resources/d1_testdocs/" + + "fgdc/nasa_d_FEDGPS1293.xml"); + try (InputStream object = new FileInputStream(objectFile)) { + StorageFactory.getStorage().storeObject(object, identifier); + } + File sysmetaFile = new File("src/test/resources/org/dataone/cn/index/resources/" + + "d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml"); + try (InputStream sysmetaStream = new FileInputStream(sysmetaFile)) { + SystemMetadata sysmeta = TypeMarshaller + .unmarshalTypeFromStream(SystemMetadata.class, sysmetaStream); + Identifier pid = new Identifier(); + pid.setValue(identifier); + sysmeta.setIdentifier(pid); + try (ByteArrayOutputStream output = new ByteArrayOutputStream()) { + TypeMarshaller.marshalTypeToOutputStream(sysmeta, output); + try (ByteArrayInputStream input = new ByteArrayInputStream(output.toByteArray())) { + StorageFactory.getStorage().storeMetadata(input, identifier); + } + } + } } - + /** - * Test the getSystemMetadata method + * Test the getObject and getSystemMetadata method * @throws Exception */ - @Test - public void testGetSystemMetadata() throws Exception { - //Test to get system metadata from a file - String currentDir = Paths.get(".").toAbsolutePath().normalize().toString(); - System.out.println("current dir " + currentDir); - String path = currentDir + "/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/json-ld/hakai-deep-schema/hakai-deep-schema.jsonld"; - String id = "hakai-deep-schema.jsonld"; - SystemMetadata sysmeta = ObjectManager.getInstance().getSystemMetadata(id, path); - assertTrue(sysmeta.getIdentifier().getValue().equals(id)); - - //Test to get system metadata from the Mock dataone cn server. - id = "ala-wai-canal-ns02-matlab-processing.eml.1.xml"; - path = null; - MockMNode mockMNode = new MockMNode("http://mnode.foo"); - mockMNode.setContext(DataONESolrJettyTestBase.getContext()); - ObjectManager.setD1Node(mockMNode); - sysmeta = ObjectManager.getInstance().getSystemMetadata(id, path); - assertTrue(sysmeta.getIdentifier().getValue().equals(id)); - - //Test the system metadata not found - id = "foo.1.1"; - path = "foo1"; - try { - sysmeta = ObjectManager.getInstance().getSystemMetadata(id, path); - fail("We should reach here"); - } catch (NotFound e) { - assert(true); + @Test + public void testGetObjectAndSystemMetadata() throws Exception { + try (InputStream input = ObjectManager.getInstance().getObject(identifier)) { + assertNotNull(input); + try (OutputStream os = new ByteArrayOutputStream()) { + MessageDigest md5 = MessageDigest.getInstance("MD5"); + // Calculate hex digests + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = input.read(buffer)) != -1) { + os.write(buffer, 0, bytesRead); + md5.update(buffer, 0, bytesRead); + } + String md5Digest = DatatypeConverter.printHexBinary(md5.digest()).toLowerCase(); + assertEquals("1755a557c13be7af44d676bb09274b0e", md5Digest); + } } + org.dataone.service.types.v1.SystemMetadata sysmeta = ObjectManager.getInstance() + .getSystemMetadata(identifier); + assertEquals(identifier, sysmeta.getIdentifier().getValue()); + assertEquals("1755a557c13be7af44d676bb09274b0e", sysmeta.getChecksum().getValue()); + assertEquals(14828, sysmeta.getSize().intValue()); } + } diff --git a/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml b/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml index 42998531..9b0dfbd6 100644 --- a/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml +++ b/src/test/resources/org/dataone/cn/index/resources/d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml @@ -9,8 +9,8 @@ 22 www.nbii.gov_metadata_mdata_NASA_nasa_d_FEDGPS1293 FGDC-STD-001.1-1999 - 14880 - c72ff66bbe7fa99e5fb399bab8cb6f85 + 14828 + 1755a557c13be7af44d676bb09274b0e CN=Dave Vieglais T799,O=Google,C=US,DC=cilogon,DC=org CN=Dave Vieglais T799,O=Google,C=US,DC=cilogon,DC=org From 8d0f35968f9d3ec6a504639121f80425a45da46b Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Fri, 28 Jun 2024 09:55:22 -0700 Subject: [PATCH 17/21] Removed the object path in the messages of rabitmq. --- .../org/dataone/cn/indexer/IndexWorker.java | 7 +- .../org/dataone/cn/indexer/SolrIndex.java | 4 +- .../queue/IndexQueueMessageParser.java | 55 +++--------- .../cn/index/DataONESolrJettyTestBase.java | 2 +- .../queue/IndexQueueMessageParserTest.java | 90 ++++++++----------- 5 files changed, 54 insertions(+), 104 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/IndexWorker.java b/src/main/java/org/dataone/cn/indexer/IndexWorker.java index 1cbcab65..ab3bc7ef 100644 --- a/src/main/java/org/dataone/cn/indexer/IndexWorker.java +++ b/src/main/java/org/dataone/cn/indexer/IndexWorker.java @@ -379,7 +379,6 @@ private void indexOjbect(IndexQueueMessageParser parser, long deliveryTag, boole Identifier pid = parser.getIdentifier(); String indexType = parser.getIndexType(); int priority = parser.getPriority(); - String finalFilePath = parser.getObjectPath(); try { // Send the acknowledge back to RabbitMQ before processing index. // This is a temporary solution for the RabbitMQ timeout issue. @@ -397,14 +396,13 @@ private void indexOjbect(IndexQueueMessageParser parser, long deliveryTag, boole + ", with the thread id " + threadId + " - Received the index task from the index queue with the identifier: " + pid.getValue() + " , the index type: " + indexType - + ", the file path (null means not to have): " + finalFilePath + ", the priotity: " + priority); if (indexType.equals(CREATE_INDEXT_TYPE)) { boolean sysmetaOnly = false; - solrIndex.update(pid, finalFilePath, sysmetaOnly); + solrIndex.update(pid, sysmetaOnly); } else if (indexType.equals(SYSMETA_CHANGE_TYPE)) { boolean sysmetaOnly = true; - solrIndex.update(pid, finalFilePath, sysmetaOnly); + solrIndex.update(pid, sysmetaOnly); } else if (indexType.equals(DELETE_INDEX_TYPE)) { solrIndex.remove(pid); } else { @@ -416,7 +414,6 @@ private void indexOjbect(IndexQueueMessageParser parser, long deliveryTag, boole logger.info("IndexWorker.indexOjbect with the thread id " + threadId + " - Completed the index task from the index queue with the identifier: " + pid.getValue() + " , the index type: " + indexType - + ", the file path (null means not to have): " + finalFilePath + ", the priotity: " + priority + " and the time taking is " + (end-start) + " milliseconds"); diff --git a/src/main/java/org/dataone/cn/indexer/SolrIndex.java b/src/main/java/org/dataone/cn/indexer/SolrIndex.java index 3ccdfce3..b07211e0 100644 --- a/src/main/java/org/dataone/cn/indexer/SolrIndex.java +++ b/src/main/java/org/dataone/cn/indexer/SolrIndex.java @@ -423,6 +423,8 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException * remove the index for the previous version(s) and generate new index for the doc. * 2. Add a new doc - if the system metadata shows the value of the archive is false, generate the * index for the doc. + * @param pid the identifier of object which will be indexed + * @param isSysmetaChangeOnly the flag indicating if the change is system metadata only * @throws NotFound * @throws ServiceFailure * @throws NotImplemented @@ -441,7 +443,7 @@ private void insertToIndex(SolrDoc doc) throws SolrServerException, IOException * @throws IllegalAccessException * @throws InstantiationException */ - public void update(Identifier pid, String relativePath, boolean isSysmetaChangeOnly) + public void update(Identifier pid, boolean isSysmetaChangeOnly) throws InvalidToken, NotAuthorized, NotImplemented, ServiceFailure, NotFound, XPathExpressionException, UnsupportedType, SAXException, ParserConfigurationException, SolrServerException, MarshallingException, diff --git a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java index 81a725da..a3a18831 100644 --- a/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java +++ b/src/main/java/org/dataone/indexer/queue/IndexQueueMessageParser.java @@ -1,23 +1,3 @@ -/** - * This work was created by participants in the DataONE project, and is - * jointly copyrighted by participating institutions in DataONE. For - * more information on DataONE, see our web site at http://dataone.org. - * - * Copyright 2022 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ package org.dataone.indexer.queue; import java.util.Map; @@ -36,12 +16,12 @@ * */ public class IndexQueueMessageParser { - private final static String HEADER_ID = "id"; //The header name in the message to store the identifier - private final static String HEADER_PATH = "path"; //The header name in the message to store the path of the object - private final static String HEADER_INDEX_TYPE = "index_type"; //The header name in the message to store the index type - + //The header name in the message to store the identifier + private final static String HEADER_ID = "id"; + //The header name in the message to store the index type + private final static String HEADER_INDEX_TYPE = "index_type"; + private Identifier identifier = null; - private String objectPath = null; private String indexType = null; private int priority = 1; @@ -55,11 +35,13 @@ public class IndexQueueMessageParser { */ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRequest { if(properties == null) { - throw new InvalidRequest("0000", "The properties, which contains the index task info, cannot be null in the index queue message."); + throw new InvalidRequest("0000", "The properties, which contains the index task info, " + + "cannot be null in the index queue message."); } Map headers = properties.getHeaders(); if(headers == null) { - throw new InvalidRequest("0000", "The header of the properties, which contains the index task info, cannot be null in the index queue message."); + throw new InvalidRequest("0000", "The header of the properties, which contains the " + + "index task info, cannot be null in the index queue message."); } Object pidObj = headers.get(HEADER_ID); if (pidObj == null) { @@ -72,7 +54,7 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe logger.debug("IndexQueueMessageParser.parse - the identifier in the message is " + pid); identifier = new Identifier(); identifier.setValue(pid); - + Object typeObj = headers.get(HEADER_INDEX_TYPE); if (typeObj == null) { throw new InvalidRequest("0000", "The index type cannot be null in the index queue message."); @@ -82,12 +64,7 @@ public void parse(AMQP.BasicProperties properties, byte[] body) throws InvalidRe throw new InvalidRequest("0000", "The index type cannot be null or blank in the index queue message."); } logger.debug("IndexQueueMessageParser.parse - the index type in the message is " + indexType); - - Object pathObject = headers.get(HEADER_PATH); - if (pathObject != null) { - objectPath = ((LongString)pathObject).toString(); - } - logger.debug("IndexQueueMessageParser.parse - the file path of the object which be indexed in the message is " + objectPath); + try { priority = properties.getPriority(); } catch (NullPointerException e) { @@ -105,16 +82,6 @@ public Identifier getIdentifier() { return identifier; } - /** - * Get the file path of the object, which will be indexed, - * after calling the parse method to parse the index queue message. - * @return the file path of the object. It can be null or blank, which - * means we don't have the object in the system. - */ - public String getObjectPath() { - return objectPath; - } - /** * Get the type of the index task after calling the parse method to parse the index queue message. * @return the type of the index task. It can be create, delete or sysmeta. diff --git a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java index 064e0969..0dbd8d29 100644 --- a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java +++ b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java @@ -97,7 +97,7 @@ protected void indexObjectToSolr(String identifier, Resource objectFile) throws } Identifier pid = new Identifier(); pid.setValue(identifier); - solrIndexService.update(pid, relativePath, isSysmetaChangeOnly); + solrIndexService.update(pid, isSysmetaChangeOnly); } /** diff --git a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java index b6a5c1cc..201a55f7 100644 --- a/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java +++ b/src/test/java/org/dataone/indexer/queue/IndexQueueMessageParserTest.java @@ -1,5 +1,6 @@ package org.dataone.indexer.queue; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -19,10 +20,11 @@ * */ public class IndexQueueMessageParserTest { - private final static String HEADER_ID = "id"; //The header name in the message to store the identifier - private final static String HEADER_PATH = "path"; //The header name in the message to store the path of the object - private final static String HEADER_INDEX_TYPE = "index_type"; //The header name in the message to store the index type - + //The header name in the message to store the identifier + private final static String HEADER_ID = "id"; + //The header name in the message to store the index type + private final static String HEADER_INDEX_TYPE = "index_type"; + /** * Test the invalid messages * @throws Exception @@ -32,54 +34,50 @@ public void testInvalidRequest() throws Exception { LongString id = null; LongString index_type = LongStringHelper.asLongString("create"); int priority = 1; - LongString filePath = LongStringHelper.asLongString("foo"); - AMQP.BasicProperties properties = generateProperties(id, index_type, priority, filePath); + AMQP.BasicProperties properties = generateProperties(id, index_type, priority); byte[] body = null; IndexQueueMessageParser parser = new IndexQueueMessageParser(); try { parser.parse(properties, body); fail("Since the idenitifer is null, we shoulder get here"); } catch (InvalidRequest e) { - + } - + id = LongStringHelper.asLongString(" "); index_type = LongStringHelper.asLongString("create"); priority = 1; - filePath = LongStringHelper.asLongString("foo"); - properties = generateProperties(id, index_type, priority, filePath); + properties = generateProperties(id, index_type, priority); try { parser.parse(properties, body); fail("Since the idenitifer is null, we shouldn't get here"); } catch (InvalidRequest e) { - + } - + id = LongStringHelper.asLongString("foo"); index_type = null; priority = 1; - filePath = LongStringHelper.asLongString("foo"); - properties = generateProperties(id, index_type, priority, filePath); + properties = generateProperties(id, index_type, priority); try { parser.parse(properties, body); fail("Since the index type is null, we shouldn't get here"); } catch (InvalidRequest e) { - + } - + id = LongStringHelper.asLongString("foo"); index_type = LongStringHelper.asLongString(""); priority = 1; - filePath = LongStringHelper.asLongString("foo"); - properties = generateProperties(id, index_type, priority, filePath); + properties = generateProperties(id, index_type, priority); try { parser.parse(properties, body); fail("Since the index type is null, we shouldn't get here"); } catch (InvalidRequest e) { - + } } - + /** * Test valid messages * @throws Exception @@ -89,62 +87,51 @@ public void testParse() throws Exception { String id = "doi:10.5063/F1HX1B4Q"; String indexType = "create"; int priority = 1; - String filePath = "/var/metacat/12dfad"; LongString longId = LongStringHelper.asLongString(id); LongString longIndexType = LongStringHelper.asLongString(indexType); - LongString longFilePath = LongStringHelper.asLongString(filePath); - AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority, longFilePath); + AMQP.BasicProperties properties = generateProperties(longId, longIndexType, priority); byte[] body = null; IndexQueueMessageParser parser = new IndexQueueMessageParser(); parser.parse(properties, body); - assertTrue(parser.getIdentifier().getValue().equals(id)); - assertTrue(parser.getIndexType().equals(indexType)); - assertTrue(parser.getPriority() == priority); - assertTrue(parser.getObjectPath().equals(filePath)); - + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); + id = "urn:uuid:45298965-f867-440c-841f-91d3abd729b7"; indexType = "delete"; priority = 2; - filePath = ""; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - longFilePath = LongStringHelper.asLongString(filePath); - properties = generateProperties(longId, longIndexType, priority, longFilePath); + properties = generateProperties(longId, longIndexType, priority); parser = new IndexQueueMessageParser(); parser.parse(properties, body); - assertTrue(parser.getIdentifier().getValue().equals(id)); - assertTrue(parser.getIndexType().equals(indexType)); - assertTrue(parser.getPriority() == priority); - assertTrue(parser.getObjectPath().equals(filePath)); + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); id = "test-foo"; indexType = "sysmeta"; priority = 10; - filePath = "c:\\foo\\abc"; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - longFilePath = LongStringHelper.asLongString(filePath); - properties = generateProperties(longId, longIndexType, priority, longFilePath); + properties = generateProperties(longId, longIndexType, priority); parser = new IndexQueueMessageParser(); parser.parse(properties, body); - assertTrue(parser.getIdentifier().getValue().equals(id)); - assertTrue(parser.getIndexType().equals(indexType)); - assertTrue(parser.getPriority() == priority); - assertTrue(parser.getObjectPath().equals(filePath)); - + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); + id = "test-foo2"; indexType = "sysmeta2"; priority = 10; longId = LongStringHelper.asLongString(id); longIndexType = LongStringHelper.asLongString(indexType); - longFilePath = null; - properties = generateProperties(longId, longIndexType, priority, longFilePath); + properties = generateProperties(longId, longIndexType, priority); parser = new IndexQueueMessageParser(); parser.parse(properties, body); - assertTrue(parser.getIdentifier().getValue().equals(id)); - assertTrue(parser.getIndexType().equals(indexType)); - assertTrue(parser.getPriority() == priority); - assertTrue(parser.getObjectPath() == null); + assertEquals(id, parser.getIdentifier().getValue()); + assertEquals(indexType, parser.getIndexType()); + assertEquals(priority, parser.getPriority()); } /** @@ -155,13 +142,10 @@ public void testParse() throws Exception { * @param filePath * @return */ - private AMQP.BasicProperties generateProperties(LongString id, LongString index_type, int priority, LongString filePath) { + private AMQP.BasicProperties generateProperties(LongString id, LongString index_type, int priority) { Map headers = new HashMap(); headers.put(HEADER_ID, id); headers.put(HEADER_INDEX_TYPE, index_type); - if (filePath != null) { - headers.put(HEADER_PATH, filePath); - } AMQP.BasicProperties basicProperties = new AMQP.BasicProperties.Builder() .contentType("text/plain") .deliveryMode(2) // set this message to persistent From 1a0355a0c331bf64133ca75146b0dfcc7f67d518 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 12 Aug 2024 11:11:15 -0700 Subject: [PATCH 18/21] Modified the code based on the reviewer's suggestion. --- .../cn/indexer/object/ObjectManager.java | 97 ++++--------------- 1 file changed, 21 insertions(+), 76 deletions(-) diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 8da964de..52f86c25 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -1,23 +1,3 @@ -/** - * This work was created by participants in the DataONE project, and is - * jointly copyrighted by participating institutions in DataONE. For - * more information on DataONE, see our web site at http://dataone.org. - * - * Copyright 2022 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ package org.dataone.cn.indexer.object; import java.io.ByteArrayInputStream; @@ -25,8 +5,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.nio.file.FileSystems; -import java.nio.file.Files; import java.security.NoSuchAlgorithmException; import org.apache.commons.io.FileUtils; @@ -36,7 +14,6 @@ import org.dataone.client.exception.ClientSideException; import org.dataone.client.rest.HttpMultipartRestClient; import org.dataone.client.rest.MultipartRestClient; -import org.dataone.client.v2.formats.ObjectFormatCache; import org.dataone.client.v2.impl.MultipartCNode; import org.dataone.client.v2.impl.MultipartD1Node; import org.dataone.client.v2.impl.MultipartMNode; @@ -51,7 +28,6 @@ import org.dataone.service.exceptions.ServiceFailure; import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v1.Session; -import org.dataone.service.types.v2.ObjectFormat; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; @@ -73,6 +49,14 @@ public class ObjectManager { private static MultipartD1Node d1Node = null; private static Session session = null; + static { + try { + manager = new ObjectManager(); + } catch (ServiceFailure | IOException e) { + logger.error("Metacat cannot initialize the ObjectManager class since " + e.getMessage()); + } + } + /** * Private constructor @@ -82,14 +66,10 @@ public class ObjectManager { */ private ObjectManager() throws ServiceFailure, IllegalArgumentException, IOException { if (storage == null) { - if (storage == null) { - storage = StorageFactory.getStorage(); - } + storage = StorageFactory.getStorage(); } if (d1Node == null) { - if (d1Node == null) { - refreshD1Node(); - } + refreshD1Node(); } else { logger.info("ObjectManager ---NOT going to create the d1node with the url " + nodeBaseURL + " since the ObjectManager already was assigned a d1node with the url " @@ -106,13 +86,6 @@ private ObjectManager() throws ServiceFailure, IllegalArgumentException, IOExcep */ public static ObjectManager getInstance() throws ServiceFailure, IllegalArgumentException, IOException { - if (manager == null) { - synchronized (ObjectManager.class) { - if (manager == null) { - manager = new ObjectManager(); - } - } - } return manager; } @@ -127,22 +100,18 @@ public static ObjectManager getInstance() throws ServiceFailure, * @throws NotFound * @throws MarshallingException * @throws IOException - * @throws IllegalAccessException - * @throws InstantiationException * @throws NoSuchAlgorithmException */ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAuthorized, - NotImplemented, ServiceFailure, NotFound, InstantiationException, - NoSuchAlgorithmException, IllegalAccessException, IOException, - MarshallingException { + NotImplemented, ServiceFailure, NotFound, + NoSuchAlgorithmException, IOException, MarshallingException { long start = System.currentTimeMillis(); //try to get the system metadata from the storage system first InputStream sysmetaInputStream = null; try { sysmetaInputStream = storage.retrieveSystemMetadata(id); long end = System.currentTimeMillis(); - logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via " - + "the file system for the pid " + id + logger.info("Finish getting the system metadata via the file system for the pid " + id + " and it took " + (end - start) + "milliseconds"); } catch (FileNotFoundException exception ) { // Metacat can't find the system metadata from the storage system. @@ -150,41 +119,18 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu SystemMetadata sysmeta = null; Identifier identifier = new Identifier(); identifier.setValue(id); - try { - for (int i=0; i<5; i++) { - try { - sysmeta = d1Node.getSystemMetadata(session, identifier); - break; - } catch (ServiceFailure ee) { - logger.warn("The DataONE api call doesn't get the system metadata since " - + ee.getMessage() + ". This is " + i - + " try and Indexer will try again."); - try { - Thread.sleep(300); - } catch (InterruptedException ie) { - logger.info("The sleep of the thread was interrupted."); - } - continue; - } - } - logger.debug("ObjectManager.getSystemMetadata - finish getting the system metadata " - + "via the DataONE API call for the pid " + id); - } catch (NotAuthorized e) { - logger.info("ObjectManager.getSystemMetadata - failed to get the system metadata " - + "via the DataONE API call for the pid " + id - + " since it is not authorized. We will refresh the token and try again"); - refreshD1Node(); - sysmeta = d1Node.getSystemMetadata(session, identifier); - } + sysmeta = d1Node.getSystemMetadata(session, identifier); + logger.debug("Finish getting the system metadata via the DataONE API call for the pid " + + id); if (sysmeta != null) { ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); - sysmetaInputStream = new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + sysmetaInputStream = + new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); } long end = System.currentTimeMillis(); - logger.info("ObjectManager.getSystemMetadata - finish getting the system metadata via " - + "DataONE API for the pid " + id + " and it took " - + (end - start) + "milliseconds"); + logger.info("Finish getting the system metadata via DataONE API for the pid " + id + + " and it took " + (end - start) + "milliseconds"); } return sysmetaInputStream; } @@ -318,8 +264,7 @@ private Session createSession(String authToken) { * @param serviceUrl the service URL for the node we are connecting to * @return a DataONE MultipartCNode object * @throws ClientSideException - * @throws IOException - * @throws MetadigException + * @throws IOException */ private MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws IOException, ClientSideException { MultipartRestClient mrc = null; From 4d6825983e19eb079dafdb0a14678a3204a4b646 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 12 Aug 2024 11:57:57 -0700 Subject: [PATCH 19/21] Deleted the StorageFactory class and the Storage interface. REnamed the HashStorage class to Storage. --- .../cn/indexer/object/ObjectManager.java | 3 +- .../dataone/indexer/storage/HashStorage.java | 99 ------------------- .../org/dataone/indexer/storage/Storage.java | 94 +++++++++++++++--- .../indexer/storage/StorageFactory.java | 28 ------ .../cn/indexer/object/ObjectManagerTest.java | 6 +- 5 files changed, 85 insertions(+), 145 deletions(-) delete mode 100644 src/main/java/org/dataone/indexer/storage/HashStorage.java delete mode 100644 src/main/java/org/dataone/indexer/storage/StorageFactory.java diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 52f86c25..4f0310eb 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -20,7 +20,6 @@ import org.dataone.configuration.Settings; import org.dataone.exceptions.MarshallingException; import org.dataone.indexer.storage.Storage; -import org.dataone.indexer.storage.StorageFactory; import org.dataone.service.exceptions.InvalidToken; import org.dataone.service.exceptions.NotAuthorized; import org.dataone.service.exceptions.NotFound; @@ -66,7 +65,7 @@ public class ObjectManager { */ private ObjectManager() throws ServiceFailure, IllegalArgumentException, IOException { if (storage == null) { - storage = StorageFactory.getStorage(); + storage = Storage.getInstance(); } if (d1Node == null) { refreshD1Node(); diff --git a/src/main/java/org/dataone/indexer/storage/HashStorage.java b/src/main/java/org/dataone/indexer/storage/HashStorage.java deleted file mode 100644 index 63795e31..00000000 --- a/src/main/java/org/dataone/indexer/storage/HashStorage.java +++ /dev/null @@ -1,99 +0,0 @@ -package org.dataone.indexer.storage; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.security.NoSuchAlgorithmException; -import java.util.Properties; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.dataone.configuration.Settings; -import org.dataone.hashstore.HashStore; -import org.dataone.hashstore.HashStoreFactory; -import org.dataone.hashstore.exceptions.HashStoreFactoryException; - -/** - * The HashFileStore implementation of the Storage interface - */ -public class HashStorage implements Storage { - - private static Log logMetacat = LogFactory.getLog(HashStorage.class); - private static HashStorage hashStorage; - private HashStore hashStore; - - /** - * Private constructor - * @param className the name of the implementation class - * @throws IOException - * @throws HashStoreFactoryException - */ - private HashStorage(String className) throws HashStoreFactoryException, IOException { - String rootPath = Settings.getConfiguration().getString("storage.hashstore.rootDirectory"); - if (rootPath == null) { - throw new HashStoreFactoryException("HashStorage.constructor - The HashStore root path " - + " is null or blank from the property of storage.hashstore.rootDirectory"); - } - String directoryDepth = Settings.getConfiguration() - .getString("storage.hashstore.directory.depth", "3"); - String directoryNameWidth = Settings.getConfiguration() - .getString("storage.hashstore.directory.width", "2"); - String fileNameAlgorithm = Settings.getConfiguration() - .getString("storage.hashstore.fileNameAlgorithm", "SHA-256"); - String defaultNamespace = Settings.getConfiguration() - .getString("storage.hashstore.defaultNamespace", - "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); - Properties storeProperties = new Properties(); - storeProperties.setProperty("storePath", rootPath); - storeProperties.setProperty("storeDepth", directoryDepth); - storeProperties.setProperty("storeWidth", directoryNameWidth); - storeProperties.setProperty("storeAlgorithm", fileNameAlgorithm); - storeProperties.setProperty("storeMetadataNamespace", defaultNamespace); - hashStore = HashStoreFactory.getHashStore(className, storeProperties); - } - - /** - * Get the instance of the class through the singleton pattern - * @param className the name of the implementation class - * @return the instance of the class - * @throws IOException - */ - public static HashStorage getInstance(String className) throws IOException { - if(hashStorage == null) { - synchronized(HashStorage.class) { - if (hashStorage == null) { - hashStorage = new HashStorage(className); - } - } - } - return hashStorage; - } - - @Override - public InputStream retrieveObject(String pid) - throws IllegalArgumentException, FileNotFoundException, IOException, - NoSuchAlgorithmException { - return hashStore.retrieveObject(pid); - } - - @Override - public InputStream retrieveSystemMetadata(String pid) - throws IllegalArgumentException, FileNotFoundException, IOException, - NoSuchAlgorithmException { - return hashStore.retrieveMetadata(pid); - } - - @Override - public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmException, - IOException,RuntimeException, InterruptedException { - hashStore.storeObject(object, pid, null, null, null, -1); - } - - @Override - public void storeMetadata(InputStream metadata, String pid) throws IOException, - IllegalArgumentException, FileNotFoundException, - InterruptedException, NoSuchAlgorithmException { - hashStore.storeMetadata(metadata, pid); - } - -} diff --git a/src/main/java/org/dataone/indexer/storage/Storage.java b/src/main/java/org/dataone/indexer/storage/Storage.java index ada10334..3eee9496 100644 --- a/src/main/java/org/dataone/indexer/storage/Storage.java +++ b/src/main/java/org/dataone/indexer/storage/Storage.java @@ -4,17 +4,74 @@ import java.io.IOException; import java.io.InputStream; import java.security.NoSuchAlgorithmException; +import java.util.Properties; -import org.dataone.hashstore.exceptions.PidRefsFileExistsException; - +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.dataone.configuration.Settings; +import org.dataone.hashstore.HashStore; +import org.dataone.hashstore.HashStoreFactory; +import org.dataone.hashstore.exceptions.HashStoreFactoryException; /** - * The Storage represents the interface to access the objects and system metadata + * The HashFileStore implementation of the Storage interface */ -public interface Storage { +public class Storage { + + private static Log log = LogFactory.getLog(Storage.class); + private static Storage instance; + private static HashStore hashStore; + static { + try { + instance = new Storage(); + } catch (IOException e) { + log.error( + "Dataone-indexer cannot initialize the Storage class since " + e.getMessage()); + } + } + + /** + * Private constructor + * @throws IOException + * @throws HashStoreFactoryException + */ + private Storage() throws HashStoreFactoryException, IOException { + String className = Settings.getConfiguration().getString("storage.className"); + String rootPath = Settings.getConfiguration().getString("storage.hashstore.rootDirectory"); + if (rootPath == null) { + throw new HashStoreFactoryException("HashStorage.constructor - The HashStore root path " + + " is null or blank from the property of storage.hashstore.rootDirectory"); + } + String directoryDepth = Settings.getConfiguration() + .getString("storage.hashstore.directory.depth", "3"); + String directoryNameWidth = Settings.getConfiguration() + .getString("storage.hashstore.directory.width", "2"); + String fileNameAlgorithm = Settings.getConfiguration() + .getString("storage.hashstore.fileNameAlgorithm", "SHA-256"); + String defaultNamespace = Settings.getConfiguration() + .getString("storage.hashstore.defaultNamespace", + "https://ns.dataone.org/service/types/v2.0#SystemMetadata"); + Properties storeProperties = new Properties(); + storeProperties.setProperty("storePath", rootPath); + storeProperties.setProperty("storeDepth", directoryDepth); + storeProperties.setProperty("storeWidth", directoryNameWidth); + storeProperties.setProperty("storeAlgorithm", fileNameAlgorithm); + storeProperties.setProperty("storeMetadataNamespace", defaultNamespace); + hashStore = HashStoreFactory.getHashStore(className, storeProperties); + } + + /** + * Get the instance of the class through the singleton pattern + * @return the instance of the class + * @throws IOException + */ + public static Storage getInstance() throws IOException { + return instance; + } + /** * Returns an InputStream to an object from HashStore using a given persistent identifier. - * + * * @param pid Authority-based identifier * @return Object InputStream * @throws IllegalArgumentException When pid is null or empty @@ -23,8 +80,11 @@ public interface Storage { * @throws NoSuchAlgorithmException When algorithm used to calculate object address is not * supported */ - public InputStream retrieveObject(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + public InputStream retrieveObject(String pid) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException { + return hashStore.retrieveObject(pid); + } /** * Returns an InputStream to the system metadata content of a given pid @@ -37,8 +97,11 @@ public InputStream retrieveObject(String pid) throws IllegalArgumentException, * @throws NoSuchAlgorithmException When algorithm used to calculate metadata address is not * supported */ - public InputStream retrieveSystemMetadata(String pid) throws IllegalArgumentException, - FileNotFoundException, IOException, NoSuchAlgorithmException; + public InputStream retrieveSystemMetadata(String pid) + throws IllegalArgumentException, FileNotFoundException, IOException, + NoSuchAlgorithmException { + return hashStore.retrieveMetadata(pid); + } /** * Store the input stream object into hash store. This method is only for the test classes. @@ -50,11 +113,13 @@ public InputStream retrieveSystemMetadata(String pid) throws IllegalArgumentExce * @throws InterruptedException */ public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmException, - IOException,RuntimeException, InterruptedException; + IOException,RuntimeException, InterruptedException { + hashStore.storeObject(object, pid, null, null, null, -1); + } /** * Store the system metadata into hash store. This method is only for the test classes. - * @param metadata the input stream of the sytem metadata + * @param metadata the input stream of the system metadata * @param pid the identifier of the system metadata * @throws IOException * @throws IllegalArgumentException @@ -63,6 +128,9 @@ public void storeObject(InputStream object, String pid) throws NoSuchAlgorithmEx * @throws NoSuchAlgorithmException */ public void storeMetadata(InputStream metadata, String pid) throws IOException, - IllegalArgumentException, FileNotFoundException, - InterruptedException, NoSuchAlgorithmException; + IllegalArgumentException, FileNotFoundException, + InterruptedException, NoSuchAlgorithmException { + hashStore.storeMetadata(metadata, pid); + } + } diff --git a/src/main/java/org/dataone/indexer/storage/StorageFactory.java b/src/main/java/org/dataone/indexer/storage/StorageFactory.java deleted file mode 100644 index fcbca1f1..00000000 --- a/src/main/java/org/dataone/indexer/storage/StorageFactory.java +++ /dev/null @@ -1,28 +0,0 @@ -package org.dataone.indexer.storage; - -import java.io.IOException; - -import org.dataone.configuration.Settings; - -/** - * The factory class to create a Storage instance - */ -public class StorageFactory { - - /** - * Get the Storage implementation instance - * @return the Storage class instance - * @throws IOException - * @throws ServiceException - */ - public static Storage getStorage() throws IOException, IllegalArgumentException{ - String className = Settings.getConfiguration().getString("storage.className"); - if (className != null && className.startsWith("org.dataone.hashstore")) { - return HashStorage.getInstance(className); - } else { - throw new IllegalArgumentException("StorageFactory.getStorage - Unrecognized the " - + " storage class " + className - + ". So Indexer can't initialize the storage system."); - } - } -} diff --git a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java index 32cc9b12..120286b9 100644 --- a/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java +++ b/src/test/java/org/dataone/cn/indexer/object/ObjectManagerTest.java @@ -15,7 +15,7 @@ import javax.xml.bind.DatatypeConverter; -import org.dataone.indexer.storage.StorageFactory; +import org.dataone.indexer.storage.Storage; import org.dataone.service.types.v1.Identifier; import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.TypeMarshaller; @@ -37,7 +37,7 @@ public void setUp() throws Exception { File objectFile = new File("src/test/resources/org/dataone/cn/index/resources/d1_testdocs/" + "fgdc/nasa_d_FEDGPS1293.xml"); try (InputStream object = new FileInputStream(objectFile)) { - StorageFactory.getStorage().storeObject(object, identifier); + Storage.getInstance().storeObject(object, identifier); } File sysmetaFile = new File("src/test/resources/org/dataone/cn/index/resources/" + "d1_testdocs/fgdc/nasa_d_FEDGPS1293Sysmeta.xml"); @@ -50,7 +50,7 @@ public void setUp() throws Exception { try (ByteArrayOutputStream output = new ByteArrayOutputStream()) { TypeMarshaller.marshalTypeToOutputStream(sysmeta, output); try (ByteArrayInputStream input = new ByteArrayInputStream(output.toByteArray())) { - StorageFactory.getStorage().storeMetadata(input, identifier); + Storage.getInstance().storeMetadata(input, identifier); } } } From 48c63a4b2e9ba52bbf734aa188ae1b327bbeca1a Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Mon, 12 Aug 2024 12:38:04 -0700 Subject: [PATCH 20/21] Modified the code based on the change on the storage class. --- .../dataone/cn/index/DataONESolrJettyTestBase.java | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java index 0dbd8d29..e0addfb3 100644 --- a/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java +++ b/src/test/java/org/dataone/cn/index/DataONESolrJettyTestBase.java @@ -19,7 +19,6 @@ import org.apache.commons.codec.EncoderException; import org.apache.commons.collections.CollectionUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.solr.SolrJettyTestBase; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.SolrServerException; @@ -36,15 +35,13 @@ import org.dataone.cn.indexer.parser.ISolrField; import org.dataone.cn.indexer.solrhttp.SolrElementField; import org.dataone.configuration.Settings; -import org.dataone.indexer.storage.StorageFactory; +import org.dataone.indexer.storage.Storage; import org.dataone.service.exceptions.NotFound; import org.dataone.service.exceptions.NotImplemented; import org.dataone.service.exceptions.ServiceFailure; import org.dataone.service.exceptions.UnsupportedType; import org.dataone.service.types.v1.Identifier; -import org.dataone.service.types.v2.SystemMetadata; import org.dataone.service.util.DateTimeMarshaller; -import org.dataone.service.util.TypeMarshaller; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; @@ -82,16 +79,16 @@ protected void indexObjectToSolr(String identifier, Resource objectFile) throws boolean isSysmetaChangeOnly = false; String relativePath = objectFile.getFile().getPath(); try { - StorageFactory.getStorage().retrieveObject(identifier); + Storage.getInstance().retrieveObject(identifier); } catch (FileNotFoundException e) { // The pid is not in the hash store and we need to save the object into hashstore try (InputStream object = objectFile.getInputStream()) { - StorageFactory.getStorage().storeObject(object, identifier); + Storage.getInstance().storeObject(object, identifier); } File sysmetaFile = getSysmetaFile(relativePath); if (sysmetaFile != null) { try (InputStream sysmeta = new FileInputStream(sysmetaFile)) { - StorageFactory.getStorage().storeMetadata(sysmeta, identifier); + Storage.getInstance().storeMetadata(sysmeta, identifier); } } } From 9bc801c2c9b37851753d880253d27dafc4950752 Mon Sep 17 00:00:00 2001 From: Jing Tao Date: Wed, 14 Aug 2024 16:00:34 -0700 Subject: [PATCH 21/21] Made changes based on the viewer's suggestion. --- helm/config/dataone-indexer.properties | 9 +++ .../cn/indexer/object/ObjectManager.java | 71 ++++++++----------- .../org/dataone/indexer/storage/Storage.java | 6 +- 3 files changed, 41 insertions(+), 45 deletions(-) diff --git a/helm/config/dataone-indexer.properties b/helm/config/dataone-indexer.properties index c51ce0aa..003cca15 100644 --- a/helm/config/dataone-indexer.properties +++ b/helm/config/dataone-indexer.properties @@ -40,3 +40,12 @@ index.resourcemap.waitingComponent.time={{ default 800 .Values.idxworker.resourc index.resourcemap.waitingComponent.max.attempts={{ default 25 .Values.idxworker.resourcemapMaxTries }} index.solr.versionConflict.waiting.time={{ default 1000 .Values.idxworker.solrVerConflictWaitMs }} index.solr.versionConflict.max.attempts={{ default 50 .Values.idxworker.solrVerConflictMaxTries }} + +# Storage properties +storage.className={{ default "org.dataone.hashstore.filehashstore.FileHashStore" .Values.idxworker.storage.hashStoreClassName }} +storage.hashstore.rootDirectory={{ default "./target/hashstore" .Values.idxworker.storage.hashStoreRootDir }} +storage.hashstore.defaultNamespace={{ default "https://ns.dataone.org/service/types/v2.0#SystemMetadata" .Values.idxworker.storage.hashStoreDefaultNamespace }} +# The following three properties must NOT be modified after the hash store is initialized +storage.hashstore.fileNameAlgorithm={{ default "SHA-256" .Values.idxworker.storage.hashStoreAlgorithm }} +storage.hashstore.directory.width={{ default 2 .Values.idxworker.storage.hashStoreDirWidth }} +storage.hashstore.directory.depth={{ default 3 .Values.idxworker.storage.hashStoreDirDepth }} diff --git a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java index 4f0310eb..df67f0f7 100644 --- a/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java +++ b/src/main/java/org/dataone/cn/indexer/object/ObjectManager.java @@ -50,41 +50,26 @@ public class ObjectManager { static { try { - manager = new ObjectManager(); - } catch (ServiceFailure | IOException e) { - logger.error("Metacat cannot initialize the ObjectManager class since " + e.getMessage()); + refreshD1Node(); + } catch (ServiceFailure e) { + logger.warn("Metacat cannot initialize the d1Node since " + e.getMessage()); } + storage = Storage.getInstance(); + manager = new ObjectManager(); } /** * Private constructor - * @throws ServiceFailure - * @throws IOException - * @throws IllegalArgumentException */ - private ObjectManager() throws ServiceFailure, IllegalArgumentException, IOException { - if (storage == null) { - storage = Storage.getInstance(); - } - if (d1Node == null) { - refreshD1Node(); - } else { - logger.info("ObjectManager ---NOT going to create the d1node with the url " + nodeBaseURL - + " since the ObjectManager already was assigned a d1node with the url " - + d1Node.getNodeBaseServiceUrl()); - } + private ObjectManager() { } /** * Get an ObjectManager instance through the singleton pattern. * @return the instance of ObjectManager - * @throws ServiceFailure - * @throws IOException - * @throws IllegalArgumentException */ - public static ObjectManager getInstance() throws ServiceFailure, - IllegalArgumentException, IOException { + public static ObjectManager getInstance() { return manager; } @@ -113,23 +98,25 @@ public InputStream getSystemMetadataStream(String id) throws InvalidToken, NotAu logger.info("Finish getting the system metadata via the file system for the pid " + id + " and it took " + (end - start) + "milliseconds"); } catch (FileNotFoundException exception ) { - // Metacat can't find the system metadata from the storage system. - // So try to get it from the dataone api - SystemMetadata sysmeta = null; - Identifier identifier = new Identifier(); - identifier.setValue(id); - sysmeta = d1Node.getSystemMetadata(session, identifier); - logger.debug("Finish getting the system metadata via the DataONE API call for the pid " - + id); - if (sysmeta != null) { - ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); - TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); - sysmetaInputStream = - new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + if (d1Node != null) { + // Metacat can't find the system metadata from the storage system. + // So try to get it from the dataone api + SystemMetadata sysmeta = null; + Identifier identifier = new Identifier(); + identifier.setValue(id); + sysmeta = d1Node.getSystemMetadata(session, identifier); + logger.debug("Finish getting the system metadata via the DataONE API call for the pid " + + id); + if (sysmeta != null) { + ByteArrayOutputStream systemMetadataOutputStream = new ByteArrayOutputStream(); + TypeMarshaller.marshalTypeToOutputStream(sysmeta, systemMetadataOutputStream); + sysmetaInputStream = + new ByteArrayInputStream(systemMetadataOutputStream.toByteArray()); + } + long end = System.currentTimeMillis(); + logger.info("Finish getting the system metadata via DataONE API for the pid " + id + + " and it took " + (end - start) + "milliseconds"); } - long end = System.currentTimeMillis(); - logger.info("Finish getting the system metadata via DataONE API for the pid " + id - + " and it took " + (end - start) + "milliseconds"); } return sysmetaInputStream; } @@ -201,7 +188,7 @@ public static void setD1Node(MultipartD1Node node) { * In case the token expired, the method will retrieve the token and create a new d1 node * @throws ServiceFailure */ - private void refreshD1Node() throws ServiceFailure { + private static void refreshD1Node() throws ServiceFailure { //get the token DataONEauthToken = System.getenv(TOKEN_VARIABLE_NAME); if (DataONEauthToken == null || DataONEauthToken.trim().equals("")) { @@ -244,7 +231,7 @@ private void refreshD1Node() throws ServiceFailure { * @param authToken the authentication token * @return the DataONE session */ - private Session createSession(String authToken) { + private static Session createSession(String authToken) { Session session = null; if (authToken == null || authToken.trim().equals("")) { logger.info("ObjectManager.createSession - Creating the public session"); @@ -265,7 +252,7 @@ private Session createSession(String authToken) { * @throws ClientSideException * @throws IOException */ - private MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws IOException, ClientSideException { + private static MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) throws IOException, ClientSideException { MultipartRestClient mrc = null; MultipartD1Node d1Node = null; // First create a default HTTP client @@ -288,7 +275,7 @@ private MultipartD1Node getMultipartD1Node(Session session, String serviceUrl) t * @param nodeStr either a DataONE node serviceURL (e.g. https://knb.ecoinformatics.org/knb/d1/mn) * or a DataONE node identifier (e.g. urn:node:CN) */ - private Boolean isCN(String nodeStr) { + private static Boolean isCN(String nodeStr) { Boolean isCN = false; // match node urn, e.g. "https://cn.dataone.org/cn" if (nodeStr.matches("^\\s*urn:node:.*")) { diff --git a/src/main/java/org/dataone/indexer/storage/Storage.java b/src/main/java/org/dataone/indexer/storage/Storage.java index 3eee9496..a0ea2176 100644 --- a/src/main/java/org/dataone/indexer/storage/Storage.java +++ b/src/main/java/org/dataone/indexer/storage/Storage.java @@ -26,7 +26,8 @@ public class Storage { instance = new Storage(); } catch (IOException e) { log.error( - "Dataone-indexer cannot initialize the Storage class since " + e.getMessage()); + "Dataone-indexer cannot initialize the Storage class since " + e.getMessage(), e); + System.exit(1); } } @@ -63,9 +64,8 @@ private Storage() throws HashStoreFactoryException, IOException { /** * Get the instance of the class through the singleton pattern * @return the instance of the class - * @throws IOException */ - public static Storage getInstance() throws IOException { + public static Storage getInstance() { return instance; }