M7350/base/tools/localize/XLIFFFile.cpp
2024-09-09 08:52:07 +00:00

611 lines
21 KiB
C++

#include "XLIFFFile.h"
#include <algorithm>
#include <sys/time.h>
#include <time.h>
#include <cstdio>
const char* const XLIFF_XMLNS = "urn:oasis:names:tc:xliff:document:1.2";
const char *const NS_MAP[] = {
"", XLIFF_XMLNS,
"xml", XMLNS_XMLNS,
NULL, NULL
};
const XMLNamespaceMap XLIFF_NAMESPACES(NS_MAP);
int
XLIFFFile::File::Compare(const XLIFFFile::File& that) const
{
if (filename != that.filename) {
return filename < that.filename ? -1 : 1;
}
return 0;
}
// =====================================================================================
XLIFFFile::XLIFFFile()
{
}
XLIFFFile::~XLIFFFile()
{
}
static XMLNode*
get_unique_node(const XMLNode* parent, const string& ns, const string& name, bool required)
{
size_t count = parent->CountElementsByName(ns, name);
if (count == 1) {
return parent->GetElementByNameAt(ns, name, 0);
} else {
if (required) {
SourcePos pos = count == 0
? parent->Position()
: parent->GetElementByNameAt(XLIFF_XMLNS, name, 1)->Position();
pos.Error("<%s> elements must contain exactly one <%s> element",
parent->Name().c_str(), name.c_str());
}
return NULL;
}
}
XLIFFFile*
XLIFFFile::Parse(const string& filename)
{
XLIFFFile* result = new XLIFFFile();
XMLNode* root = NodeHandler::ParseFile(filename, XMLNode::PRETTY);
if (root == NULL) {
return NULL;
}
// <file>
vector<XMLNode*> files = root->GetElementsByName(XLIFF_XMLNS, "file");
for (size_t i=0; i<files.size(); i++) {
XMLNode* file = files[i];
string datatype = file->GetAttribute("", "datatype", "");
string originalFile = file->GetAttribute("", "original", "");
Configuration sourceConfig;
sourceConfig.locale = file->GetAttribute("", "source-language", "");
result->m_sourceConfig = sourceConfig;
Configuration targetConfig;
targetConfig.locale = file->GetAttribute("", "target-language", "");
result->m_targetConfig = targetConfig;
result->m_currentVersion = file->GetAttribute("", "build-num", "");
result->m_oldVersion = "old";
// <body>
XMLNode* body = get_unique_node(file, XLIFF_XMLNS, "body", true);
if (body == NULL) continue;
// <trans-unit>
vector<XMLNode*> transUnits = body->GetElementsByName(XLIFF_XMLNS, "trans-unit");
for (size_t j=0; j<transUnits.size(); j++) {
XMLNode* transUnit = transUnits[j];
string rawID = transUnit->GetAttribute("", "id", "");
if (rawID == "") {
transUnit->Position().Error("<trans-unit> tag requires an id");
continue;
}
string id;
int index;
if (!StringResource::ParseTypedID(rawID, &id, &index)) {
transUnit->Position().Error("<trans-unit> has invalid id '%s'\n", rawID.c_str());
continue;
}
// <source>
XMLNode* source = get_unique_node(transUnit, XLIFF_XMLNS, "source", false);
if (source != NULL) {
XMLNode* node = source->Clone();
node->SetPrettyRecursive(XMLNode::EXACT);
result->AddStringResource(StringResource(source->Position(), originalFile,
sourceConfig, id, index, node, CURRENT_VERSION,
result->m_currentVersion));
}
// <target>
XMLNode* target = get_unique_node(transUnit, XLIFF_XMLNS, "target", false);
if (target != NULL) {
XMLNode* node = target->Clone();
node->SetPrettyRecursive(XMLNode::EXACT);
result->AddStringResource(StringResource(target->Position(), originalFile,
targetConfig, id, index, node, CURRENT_VERSION,
result->m_currentVersion));
}
// <alt-trans>
XMLNode* altTrans = get_unique_node(transUnit, XLIFF_XMLNS, "alt-trans", false);
if (altTrans != NULL) {
// <source>
XMLNode* altSource = get_unique_node(altTrans, XLIFF_XMLNS, "source", false);
if (altSource != NULL) {
XMLNode* node = altSource->Clone();
node->SetPrettyRecursive(XMLNode::EXACT);
result->AddStringResource(StringResource(altSource->Position(),
originalFile, sourceConfig, id, index, node, OLD_VERSION,
result->m_oldVersion));
}
// <target>
XMLNode* altTarget = get_unique_node(altTrans, XLIFF_XMLNS, "target", false);
if (altTarget != NULL) {
XMLNode* node = altTarget->Clone();
node->SetPrettyRecursive(XMLNode::EXACT);
result->AddStringResource(StringResource(altTarget->Position(),
originalFile, targetConfig, id, index, node, OLD_VERSION,
result->m_oldVersion));
}
}
}
}
delete root;
return result;
}
XLIFFFile*
XLIFFFile::Create(const Configuration& sourceConfig, const Configuration& targetConfig,
const string& currentVersion)
{
XLIFFFile* result = new XLIFFFile();
result->m_sourceConfig = sourceConfig;
result->m_targetConfig = targetConfig;
result->m_currentVersion = currentVersion;
return result;
}
set<string>
XLIFFFile::Files() const
{
set<string> result;
for (vector<File>::const_iterator f = m_files.begin(); f != m_files.end(); f++) {
result.insert(f->filename);
}
return result;
}
void
XLIFFFile::AddStringResource(const StringResource& str)
{
string id = str.TypedID();
File* f = NULL;
const size_t I = m_files.size();
for (size_t i=0; i<I; i++) {
if (m_files[i].filename == str.file) {
f = &m_files[i];
break;
}
}
if (f == NULL) {
File file;
file.filename = str.file;
m_files.push_back(file);
f = &m_files[I];
}
const size_t J = f->transUnits.size();
TransUnit* g = NULL;
for (size_t j=0; j<J; j++) {
if (f->transUnits[j].id == id) {
g = &f->transUnits[j];
}
}
if (g == NULL) {
TransUnit group;
group.id = id;
f->transUnits.push_back(group);
g = &f->transUnits[J];
}
StringResource* res = find_string_res(*g, str);
if (res == NULL) {
return ;
}
if (res->id != "") {
str.pos.Error("Duplicate string resource: %s", res->id.c_str());
res->pos.Error("Previous definition here");
return ;
}
*res = str;
m_strings.insert(str);
}
void
XLIFFFile::Filter(bool (*func)(const string&,const TransUnit&,void*), void* cookie)
{
const size_t I = m_files.size();
for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
File& file = m_files[i];
const size_t J = file.transUnits.size();
for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
TransUnit& tu = file.transUnits[j];
bool keep = func(file.filename, tu, cookie);
if (!keep) {
if (tu.source.id != "") {
m_strings.erase(tu.source);
}
if (tu.target.id != "") {
m_strings.erase(tu.target);
}
if (tu.altSource.id != "") {
m_strings.erase(tu.altSource);
}
if (tu.altTarget.id != "") {
m_strings.erase(tu.altTarget);
}
file.transUnits.erase(file.transUnits.begin()+j);
}
}
if (file.transUnits.size() == 0) {
m_files.erase(m_files.begin()+i);
}
}
}
void
XLIFFFile::Map(void (*func)(const string&,TransUnit*,void*), void* cookie)
{
const size_t I = m_files.size();
for (size_t i=0; i<I; i++) {
File& file = m_files[i];
const size_t J = file.transUnits.size();
for (size_t j=0; j<J; j++) {
func(file.filename, &(file.transUnits[j]), cookie);
}
}
}
TransUnit*
XLIFFFile::EditTransUnit(const string& filename, const string& id)
{
const size_t I = m_files.size();
for (size_t ix=0, i=I-1; ix<I; ix++, i--) {
File& file = m_files[i];
if (file.filename == filename) {
const size_t J = file.transUnits.size();
for (size_t jx=0, j=J-1; jx<J; jx++, j--) {
TransUnit& tu = file.transUnits[j];
if (tu.id == id) {
return &tu;
}
}
}
}
return NULL;
}
StringResource*
XLIFFFile::find_string_res(TransUnit& g, const StringResource& str)
{
int index;
if (str.version == CURRENT_VERSION) {
index = 0;
}
else if (str.version == OLD_VERSION) {
index = 2;
}
else {
str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
return NULL;
}
if (str.config == m_sourceConfig) {
// index += 0;
}
else if (str.config == m_targetConfig) {
index += 1;
}
else {
str.pos.Error("unknown config for string %s: %s", str.id.c_str(),
str.config.ToString().c_str());
return NULL;
}
switch (index) {
case 0:
return &g.source;
case 1:
return &g.target;
case 2:
return &g.altSource;
case 3:
return &g.altTarget;
}
str.pos.Error("Internal Error %s:%d\n", __FILE__, __LINE__);
return NULL;
}
int
convert_html_to_xliff(const XMLNode* original, const string& name, XMLNode* addTo, int* phID)
{
int err = 0;
if (original->Type() == XMLNode::TEXT) {
addTo->EditChildren().push_back(original->Clone());
return 0;
} else {
string ctype;
if (original->Namespace() == "") {
if (original->Name() == "b") {
ctype = "bold";
}
else if (original->Name() == "i") {
ctype = "italic";
}
else if (original->Name() == "u") {
ctype = "underline";
}
}
if (ctype != "") {
vector<XMLAttribute> attrs;
attrs.push_back(XMLAttribute(XLIFF_XMLNS, "ctype", ctype));
XMLNode* copy = XMLNode::NewElement(original->Position(), XLIFF_XMLNS, "g",
attrs, XMLNode::EXACT);
const vector<XMLNode*>& children = original->Children();
size_t I = children.size();
for (size_t i=0; i<I; i++) {
err |= convert_html_to_xliff(children[i], name, copy, phID);
}
return err;
}
else {
if (original->Namespace() == XLIFF_XMLNS) {
addTo->EditChildren().push_back(original->Clone());
return 0;
} else {
if (original->Namespace() == "") {
// flatten out the tag into ph tags -- but only if there is no namespace
// that's still unsupported because propagating the xmlns attribute is hard.
vector<XMLAttribute> attrs;
char idStr[30];
(*phID)++;
sprintf(idStr, "id-%d", *phID);
attrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", idStr));
if (original->Children().size() == 0) {
XMLNode* ph = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
"ph", attrs, XMLNode::EXACT);
ph->EditChildren().push_back(
XMLNode::NewText(original->Position(),
original->ToString(XLIFF_NAMESPACES),
XMLNode::EXACT));
addTo->EditChildren().push_back(ph);
} else {
XMLNode* begin = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
"bpt", attrs, XMLNode::EXACT);
begin->EditChildren().push_back(
XMLNode::NewText(original->Position(),
original->OpenTagToString(XLIFF_NAMESPACES, XMLNode::EXACT),
XMLNode::EXACT));
XMLNode* end = XMLNode::NewElement(original->Position(), XLIFF_XMLNS,
"ept", attrs, XMLNode::EXACT);
string endText = "</";
endText += original->Name();
endText += ">";
end->EditChildren().push_back(XMLNode::NewText(original->Position(),
endText, XMLNode::EXACT));
addTo->EditChildren().push_back(begin);
const vector<XMLNode*>& children = original->Children();
size_t I = children.size();
for (size_t i=0; i<I; i++) {
err |= convert_html_to_xliff(children[i], name, addTo, phID);
}
addTo->EditChildren().push_back(end);
}
return err;
} else {
original->Position().Error("invalid <%s> element in <%s> tag\n",
original->Name().c_str(), name.c_str());
return 1;
}
}
}
}
}
XMLNode*
create_string_node(const StringResource& str, const string& name)
{
vector<XMLAttribute> attrs;
attrs.push_back(XMLAttribute(XMLNS_XMLNS, "space", "preserve"));
XMLNode* node = XMLNode::NewElement(str.pos, XLIFF_XMLNS, name, attrs, XMLNode::EXACT);
const vector<XMLNode*>& children = str.value->Children();
size_t I = children.size();
int err = 0;
for (size_t i=0; i<I; i++) {
int phID = 0;
err |= convert_html_to_xliff(children[i], name, node, &phID);
}
if (err != 0) {
delete node;
}
return node;
}
static bool
compare_id(const TransUnit& lhs, const TransUnit& rhs)
{
string lid, rid;
int lindex, rindex;
StringResource::ParseTypedID(lhs.id, &lid, &lindex);
StringResource::ParseTypedID(rhs.id, &rid, &rindex);
if (lid < rid) return true;
if (lid == rid && lindex < rindex) return true;
return false;
}
XMLNode*
XLIFFFile::ToXMLNode() const
{
XMLNode* root;
size_t N;
// <xliff>
{
vector<XMLAttribute> attrs;
XLIFF_NAMESPACES.AddToAttributes(&attrs);
attrs.push_back(XMLAttribute(XLIFF_XMLNS, "version", "1.2"));
root = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "xliff", attrs, XMLNode::PRETTY);
}
vector<TransUnit> groups;
// <file>
vector<File> files = m_files;
sort(files.begin(), files.end());
const size_t I = files.size();
for (size_t i=0; i<I; i++) {
const File& file = files[i];
vector<XMLAttribute> fileAttrs;
fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "datatype", "x-android-res"));
fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "original", file.filename));
struct timeval tv;
struct timezone tz;
gettimeofday(&tv, &tz);
fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "date", trim_string(ctime(&tv.tv_sec))));
fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "source-language", m_sourceConfig.locale));
fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "target-language", m_targetConfig.locale));
fileAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "build-num", m_currentVersion));
XMLNode* fileNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "file", fileAttrs,
XMLNode::PRETTY);
root->EditChildren().push_back(fileNode);
// <body>
XMLNode* bodyNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "body",
vector<XMLAttribute>(), XMLNode::PRETTY);
fileNode->EditChildren().push_back(bodyNode);
// <trans-unit>
vector<TransUnit> transUnits = file.transUnits;
sort(transUnits.begin(), transUnits.end(), compare_id);
const size_t J = transUnits.size();
for (size_t j=0; j<J; j++) {
const TransUnit& transUnit = transUnits[j];
vector<XMLAttribute> tuAttrs;
// strings start with string:
tuAttrs.push_back(XMLAttribute(XLIFF_XMLNS, "id", transUnit.id));
XMLNode* transUnitNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "trans-unit",
tuAttrs, XMLNode::PRETTY);
bodyNode->EditChildren().push_back(transUnitNode);
// <extradata>
if (transUnit.source.comment != "") {
vector<XMLAttribute> extradataAttrs;
XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "extradata",
extradataAttrs, XMLNode::EXACT);
transUnitNode->EditChildren().push_back(extraNode);
extraNode->EditChildren().push_back(
XMLNode::NewText(GENERATED_POS, transUnit.source.comment,
XMLNode::PRETTY));
}
// <source>
if (transUnit.source.id != "") {
transUnitNode->EditChildren().push_back(
create_string_node(transUnit.source, "source"));
}
// <target>
if (transUnit.target.id != "") {
transUnitNode->EditChildren().push_back(
create_string_node(transUnit.target, "target"));
}
// <alt-trans>
if (transUnit.altSource.id != "" || transUnit.altTarget.id != ""
|| transUnit.rejectComment != "") {
vector<XMLAttribute> altTransAttrs;
XMLNode* altTransNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS, "alt-trans",
altTransAttrs, XMLNode::PRETTY);
transUnitNode->EditChildren().push_back(altTransNode);
// <extradata>
if (transUnit.rejectComment != "") {
vector<XMLAttribute> extradataAttrs;
XMLNode* extraNode = XMLNode::NewElement(GENERATED_POS, XLIFF_XMLNS,
"extradata", extradataAttrs,
XMLNode::EXACT);
altTransNode->EditChildren().push_back(extraNode);
extraNode->EditChildren().push_back(
XMLNode::NewText(GENERATED_POS, transUnit.rejectComment,
XMLNode::PRETTY));
}
// <source>
if (transUnit.altSource.id != "") {
altTransNode->EditChildren().push_back(
create_string_node(transUnit.altSource, "source"));
}
// <target>
if (transUnit.altTarget.id != "") {
altTransNode->EditChildren().push_back(
create_string_node(transUnit.altTarget, "target"));
}
}
}
}
return root;
}
string
XLIFFFile::ToString() const
{
XMLNode* xml = ToXMLNode();
string s = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
s += xml->ToString(XLIFF_NAMESPACES);
delete xml;
s += '\n';
return s;
}
Stats
XLIFFFile::GetStats(const string& config) const
{
Stats stat;
stat.config = config;
stat.files = m_files.size();
stat.toBeTranslated = 0;
stat.noComments = 0;
for (vector<File>::const_iterator file=m_files.begin(); file!=m_files.end(); file++) {
stat.toBeTranslated += file->transUnits.size();
for (vector<TransUnit>::const_iterator tu=file->transUnits.begin();
tu!=file->transUnits.end(); tu++) {
if (tu->source.comment == "") {
stat.noComments++;
}
}
}
stat.totalStrings = stat.toBeTranslated;
return stat;
}