2 jmdict, a frontend to the JMdict file. http://mandrill.fuxx0r.net/jmdict.php
3 Copyright (C) 2004 Florian Bluemel (florian.bluemel@uni-dortmund.de)
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 #include "kana2romaji.h"
29 void utfchar(const string& from, string::size_type pos, string& to) {
30 unsigned first = from[pos];
31 if ((first & 0x80) == 0)
34 string::size_type len = 0;
35 while (first & 0x80) {
39 to = from.substr(pos, len);
44 typedef map<string, string> romaji_map;
102 romaji["ゃ"] = "\1ya";
103 romaji["ゅ"] = "\1yu";
104 romaji["ょ"] = "\1yo";
189 romaji["ヮ"] = "\1wa";
190 romaji["ャ"] = "\1ya";
191 romaji["ュ"] = "\1yu";
192 romaji["ョ"] = "\1yo";
223 // -- double width letters ------
339 string::size_type const pos,
342 // if we encounter something like
344 // remove the previous character of \1 and the \1
345 // if \1 is followed by an 'y' remove that also
348 (pos + 1 < rom.size()
349 && rom[pos + 1] == 'y'
355 void kana2romaji(const string& kana, string& rom) {
357 for (string::size_type pos = 0; pos < kana.size(); ) {
359 utfchar(kana, pos, ch);
360 romaji_map::const_iterator trans = romaji.find(ch);
361 if (trans == romaji.end()) {
364 cout << "Don't know how to translate '" << ch << "' in '" << kana << "' to romaji.\n";
367 rom += trans->second;
370 for (string::size_type pos = 0; pos < rom.size(); ++pos)
371 if (rom[pos] == '\1') {
374 string const pred = rom.substr(pos - 3, 3);
381 remove_quote_1(pos, rom);
388 string const pred = rom.substr(pos - 2, 2);
400 // shorten "ji\1y" to "j"
401 // otherwise remove "\1" and the preceding character
405 (pos + 1 < rom.size()
406 && rom[pos + 1] == 'y'
419 remove_quote_1(pos, rom);
451 char const pred = rom[pos - 1];
464 cout << "Encountered a special character in " << kana << " but don't know what to do with it.\n";
473 else if (rom[pos] == '\2')
475 // two tsu may follow each other, so just remove them
476 if(pos + 1 < rom.size() && rom[pos + 1] != '\2')
477 rom[pos] = rom[pos + 1];
484 else if (rom[pos] == '\3')
492 cout << "ー is the first letter of " << kana << ". Don't know how to translate this.\n";
498 rom[pos] = rom[pos-1];
501 for (string::size_type pos = 0; pos < rom.size(); ++pos)
507 cout << "Failed to translate " << kana << '\n';