Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#251 Fix inconsistent indentation and formatting of code #252

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
693 changes: 351 additions & 342 deletions dkpro-jwpl-api/src/main/java/org/dkpro/jwpl/api/Category.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -19,36 +19,35 @@

import java.util.Iterator;


/**
* An iterable over category objects retrieved by Category.getDescendants()
*
* An {@link Iterable} over category objects retrieved by {@link Category#getDescendants()}.
*/
public class CategoryDescendantsIterable implements Iterable<Category> {

private final Wikipedia wiki;
private final Category startCategory;

/**
* The size of the page buffer.
* With bufferSize = 1, a database connection is needed for retrieving a single article.
* Higher bufferSize gives better performance, but needs memory.
* Initialize it with 25.
*/
private int bufferSize = 25;

public CategoryDescendantsIterable(Wikipedia wiki, Category startCategory) {
this.wiki = wiki;
this.startCategory = startCategory;
}

public CategoryDescendantsIterable(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
this.bufferSize = bufferSize;
this.startCategory = startCategory;
}

public Iterator<Category> iterator() {
return new CategoryDescendantsIterator(wiki, bufferSize, startCategory);
}
private final Wikipedia wiki;
private final Category startCategory;

/*
* The size of the page buffer.
* With bufferSize = 1, a database connection is needed for retrieving a single article.
* Higher bufferSize gives better performance, but needs memory.
* Initialize it with 25.
*/
private int bufferSize = 25;

public CategoryDescendantsIterable(Wikipedia wiki, Category startCategory) {
this.wiki = wiki;
this.startCategory = startCategory;
}

public CategoryDescendantsIterable(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
this.bufferSize = bufferSize;
this.startCategory = startCategory;
}

@Override
public Iterator<Category> iterator() {
return new CategoryDescendantsIterator(wiki, bufferSize, startCategory);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Expand All @@ -28,155 +28,153 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
* An iterator over category objects retrieved by Category.getDescendants()
*
* An {@link Iterator} over category objects retrieved by {@link Category#getDescendants()}.
*/
public class CategoryDescendantsIterator implements Iterator<Category> {

private final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

private final Wikipedia wiki;
private final Wikipedia wiki;

private final CategoryBuffer buffer;
private final CategoryBuffer buffer;

/** Contains all category ids that have not been expanded, yet. */
private final Set<Integer> notExpandedCategories;
/**
* Contains all category ids that have not been expanded, yet.
*/
private final Set<Integer> notExpandedCategories;

/** As we do not inspect the whole graph at once now, we need a way to check whether a node was already expanded, to avoid infinite loops. */
private final Set<Integer> expandedCategoryIds;
/**
* As we do not inspect the whole graph at once now, we need a way to check whether a node was already expanded, to avoid infinite loops.
*/
private final Set<Integer> expandedCategoryIds;

public CategoryDescendantsIterator(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
buffer = new CategoryBuffer(bufferSize);
notExpandedCategories = new HashSet<>();
// initialize with children of start category
for (Category catItem : startCategory.getChildren()) {
notExpandedCategories.add(catItem.getPageId());
}
public CategoryDescendantsIterator(Wikipedia wiki, int bufferSize, Category startCategory) {
this.wiki = wiki;
buffer = new CategoryBuffer(bufferSize);
notExpandedCategories = new HashSet<>();
// initialize with children of start category
for (Category catItem : startCategory.getChildren()) {
notExpandedCategories.add(catItem.getPageId());
}

expandedCategoryIds = new HashSet<>();
expandedCategoryIds = new HashSet<>();
}

@Override
public boolean hasNext() {
return buffer.hasNext();
}

@Override
public Category next() {
return buffer.next();
}

@Override
public void remove() {
throw new UnsupportedOperationException();
}

/**
* Buffers categories in a list.
*/
class CategoryBuffer {

private final List<Category> buffer;
private final int maxBufferSize; // the number of pages to be buffered after a query to the database.
private int bufferFillSize; // even a 500 slot buffer can be filled with only 5 elements
private int bufferOffset; // the offset in the buffer
private int dataOffset; // the overall offset in the data

public CategoryBuffer(int bufferSize) {
this.maxBufferSize = bufferSize;
this.buffer = new ArrayList<>();
this.bufferFillSize = 0;
this.bufferOffset = 0;
this.dataOffset = 0;

//TODO test whether this works when zero pages are retrieved
// we can test this here using a unit test that retrieves no descendants!
}

public boolean hasNext(){
return buffer.hasNext();
/**
* If there are elements in the buffer left, then return true.
* If the end of the filled buffer is reached, then try to load new buffer.
*
* @return True, if there are pages left. False otherwise.
*/
public boolean hasNext() {
if (bufferOffset < bufferFillSize) {
return true;
} else {
return this.fillBuffer();
}
}

public Category next(){
return buffer.next();
/**
* @return The next Category or null if no more categories are available.
*/
public Category next() {
// if there are still elements in the buffer, just retrieve the next one
if (bufferOffset < bufferFillSize) {
return this.getBufferElement();
}
// if there are no more elements => try to fill a new buffer
else if (this.fillBuffer()) {
return this.getBufferElement();
} else {
// if it cannot be filled => return null
return null;
}
}

public void remove() {
throw new UnsupportedOperationException();
private Category getBufferElement() {
Category cat = buffer.get(bufferOffset);
bufferOffset++;
dataOffset++;
return cat;
}

/**
* Buffers categories in a list.
*
*
*/
class CategoryBuffer{

private final List<Category> buffer;
private final int maxBufferSize; // the number of pages to be buffered after a query to the database.
private int bufferFillSize; // even a 500 slot buffer can be filled with only 5 elements
private int bufferOffset; // the offset in the buffer
private int dataOffset; // the overall offset in the data

public CategoryBuffer(int bufferSize){
this.maxBufferSize = bufferSize;
this.buffer = new ArrayList<>();
this.bufferFillSize = 0;
this.bufferOffset = 0;
this.dataOffset = 0;

//TODO test whether this works when zero pages are retrieved
// we can test this here using a unit test that retrieves no descendants!
}
private boolean fillBuffer() {

/**
* If there are elements in the buffer left, then return true.
* If the end of the filled buffer is reached, then try to load new buffer.
* @return True, if there are pages left. False otherwise.
*/
public boolean hasNext(){
if (bufferOffset < bufferFillSize) {
return true;
}
else {
return this.fillBuffer();
}
}
// clear the old buffer and all variables regarding the state of the buffer
buffer.clear();
bufferOffset = 0;
bufferFillSize = 0;

/**
*
* @return The next Category or null if no more categories are available.
*/
public Category next(){
// if there are still elements in the buffer, just retrieve the next one
if (bufferOffset < bufferFillSize) {
return this.getBufferElement();
}
// if there are no more elements => try to fill a new buffer
else if (this.fillBuffer()) {
return this.getBufferElement();
}
else {
// if it cannot be filled => return null
return null;
}
}
// add not expanded categories to queue
List<Integer> queue = new LinkedList<>(notExpandedCategories);

private Category getBufferElement() {
Category cat = buffer.get(bufferOffset);
bufferOffset++;
dataOffset++;
return cat;
}
// expand until buffer size is reached
while (!queue.isEmpty() && buffer.size() < maxBufferSize) {
// remove first element from queue
Category currentCat = wiki.getCategory(queue.get(0));
queue.remove(0);

// if the node was not previously expanded
if (!expandedCategoryIds.contains(currentCat.getPageId())) {
buffer.add(currentCat);
notExpandedCategories.remove(currentCat.getPageId());
expandedCategoryIds.add(currentCat.getPageId());

logger.debug("buf: " + buffer.size());
logger.debug("notExp: " + notExpandedCategories);
logger.debug("exp: " + expandedCategoryIds);

private boolean fillBuffer() {

// clear the old buffer and all variables regarding the state of the buffer
buffer.clear();
bufferOffset = 0;
bufferFillSize = 0;

List<Integer> queue = new LinkedList<>();

// add not expanded categories to queue
queue.addAll(notExpandedCategories);

// expand until buffer size is reached
while (!queue.isEmpty() && buffer.size() < maxBufferSize) {
// remove first element from queue
Category currentCat = wiki.getCategory(queue.get(0));
queue.remove(0);

// if the node was not previously expanded
if (!expandedCategoryIds.contains(currentCat.getPageId())) {
buffer.add(currentCat);
notExpandedCategories.remove(currentCat.getPageId());
expandedCategoryIds.add(currentCat.getPageId());

logger.debug("buf: " + buffer.size());
logger.debug("notExp: " + notExpandedCategories);
logger.debug("exp: " + expandedCategoryIds);

for (Category child : currentCat.getChildren()) {
queue.add(child.getPageId());
notExpandedCategories.add(child.getPageId());
}
}
}

if (buffer.size() > 0) {
bufferFillSize = buffer.size();
return true;
}
else {
return false;
}
for (Category child : currentCat.getChildren()) {
queue.add(child.getPageId());
notExpandedCategories.add(child.getPageId());
}
}
}

if (buffer.size() > 0) {
bufferFillSize = buffer.size();
return true;
} else {
return false;
}
}
}
}
Loading