I am having problem retrieving a utf-8 encoded document using the java API. When I get the document using the Tamino x-plorer it comes back fine. However with the API it is not encoded properly. The xml tag say utf-8 but the characters are not correct. In particular a copyright symbol is coming back as a single byte instead of the 2 byte representation it should be for utf-8
The code I am using to get the document out is as follows:
/**
- GenerateXdomStats.java
-
@author Created by Omnicore CodeGuide
*/
package edu.harvard.hul.ois.oasis;
import com.softwareag.tamino.db.API.connection.;
import org.jdom.;
import java.io.;
import edu.harvard.hul.ois.xml.;
import org.xml.sax.;
import edu.harvard.hul.ois.ted.;
import org.jdom.output.;
import edu.harvard.hul.ois.xdom.;
import java.net.;
import com.softwareag.tamino.db.API.accessor.;
import com.softwareag.tamino.db.API.objectModel.jdom.TJDOMObjectModel;
import com.softwareag.tamino.db.API.response.TResponse;
import com.softwareag.tamino.db.API.objectModel.*;
import org.jdom.xpath.XPath;
public class OasisUnloader {
private static String databaseURI ="";
private static String collection ="";
private static String schemaLocation ="";
private static FileWriter logFile;
/**
* @param args the command line arguments
*/
public static void main(String[] args) throws NumberFormatException,
TServerNotAvailableException, JDOMException, IOException, TConnectionCloseException, TQueryException, TNoSuchXMLObjectException, TIteratorException, TTransactionModeChangeException {
if (args.length != 1) {
System.out.println("usage: java ViaLoader oasisLoader.xml");
System.exit(0);
}
String oasisLoaderConfigFile = args[0];
XPath uniqueIdXpath = null;
if ((new File(oasisLoaderConfigFile)).exists()) {
//If there is an exception before the logfile is created then just dump
//it to standard error.
try {
XmlConfig config = new XmlConfig(oasisLoaderConfigFile);
config.parse();
databaseURI = config.getString("databaseURI");
collection = config.getString("taminoCollection");
schemaLocation = config.getString("schema");
String logFileName = config.getString("logFile");
logFile = new FileWriter(logFileName);
uniqueIdXpathString = config.getString("uniqueIDXpath");
//uniqueIdXpath = XPath.newInstance(uniqueIdXpathString);
uniqueIdXpath = XPath.newInstance("eadheader/eadid");
} catch (IOException e) {
System.err.println(e.getMessage());
e.printStackTrace();
} catch (SAXException e) {
System.err.println(e.getMessage());
e.printStackTrace();
}
}
TConnectionFactory connectionFactory = TConnectionFactory.getInstance();
//Obtain the connection and accessor for querying
TConnection _connection = connectionFactory.newConnection(databaseURI);
TXMLObjectAccessor _accessor = _connection.newXMLObjectAccessor(TAccessLocation.newInstance(collection),
TJDOMObjectModel.getInstance());
_connection.setIsolationLevel(TIsolationLevel.UNPROTECTED) ;
TLocalTransaction _transactionID = _connection.useLocalTransactionMode();
TQuery tQuery = TQuery.newInstance("/ead[eadheader/eadid='ajp00003']");
TResponse _response = _accessor.query(tQuery,5);
TXMLObjectIterator objectIterator = _response.getXMLObjectIterator();
TXMLObject tXmlObject = TXMLObject.newInstance(TJDOMObjectModel.getInstance());
tXmlObject.setEncoding("utf-8");
int itemsProcessed = 0;
Element oasisRecord;
XMLOutputter xmlWriter = new XMLOutputter();
xmlWriter.setEncoding("utf-8");
FileWriter fw;
Element eadidTextNode = null;
String eadid;
String encoding;
while(objectIterator.hasNext()) {
//while(itemsProcessed < 2) {
itemsProcessed++;
System.out.println("Items Processed: " + itemsProcessed);
//oasisRecord = ((Element)objectIterator.next().getElement()).detach();
tXmlObject = objectIterator.next();
encoding = tXmlObject.getEncoding();
tXmlObject.setEncoding("utf-8");
encoding = tXmlObject.getEncoding();
oasisRecord = ((Element)tXmlObject.getElement()).detach();
eadidTextNode = (Element)uniqueIdXpath.selectSingleNode(oasisRecord);
eadid = eadidTextNode.getText();
fw = new FileWriter("/home/oasis/xmlOutput/" + eadid + ".xml");
encoding = fw.getEncoding();
Document doc = new Document(oasisRecord);
xmlWriter.output(doc, fw);
fw.close();
}
_connection.close();
}
}
any insight would be great.