2 jmdict, a frontend to the JMdict file. http://mandrill.fuxx0r.net/jmdict.php
3 Copyright (C) 2004 Florian Bluemel (florian.bluemel@uni-dortmund.de)
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 #include "kana2romaji.h"
29 void utfchar(const string& from, string::size_type pos, string& to) {
30 unsigned first = from[pos];
31 if ((first & 0x80) == 0)
34 string::size_type len = 0;
35 while (first & 0x80) {
39 to = from.substr(pos, len);
44 typedef map<string, string> romaji_map;
102 romaji["ゃ"] = "\1ya";
103 romaji["ゅ"] = "\1yu";
104 romaji["ょ"] = "\1yo";
189 romaji["ヮ"] = "\1wa";
190 romaji["ャ"] = "\1ya";
191 romaji["ュ"] = "\1yu";
192 romaji["ョ"] = "\1yo";
223 // -- double width letters ------
295 romaji["'"] = "'"; // TODO:
326 // don't know where those belong to
328 romaji["、"] = ","; // TODO:
333 romaji["・"] = "-"; // FIXME
337 string::size_type const pos,
340 // if we encounter something like
342 // remove the previous character of \1 and the \1
343 // if \1 is followed by an 'y' remove that also
346 (pos + 1 < rom.size()
347 && rom[pos + 1] == 'y'
353 void kana2romaji(const string& kana, string& rom) {
355 for (string::size_type pos = 0; pos < kana.size(); ) {
357 utfchar(kana, pos, ch);
358 romaji_map::const_iterator trans = romaji.find(ch);
359 if (trans == romaji.end()) {
362 cout << "Don't know how to translate '" << ch << "' in '" << kana << "' to romaji." << endl;
365 rom += trans->second;
368 for (string::size_type pos = 0; pos < rom.size(); ++pos)
369 if (rom[pos] == '\1') {
372 string const pred = rom.substr(pos - 3, 3);
377 remove_quote_1(pos, rom);
383 string const pred = rom.substr(pos - 2, 2);
394 // shorten "ji\1y" to "j"
395 // otherwise remove "\1" and the preceding character
399 (pos + 1 < rom.size()
400 && rom[pos + 1] == 'y'
413 remove_quote_1(pos, rom);
419 else if (rom[pos] == '\2' && pos + 1 < rom.size())
420 rom[pos] = rom[pos + 1];