2 jmdict, a frontend to the JMdict file. http://mandrill.fuxx0r.net/jmdict.php
3 Copyright (C) 2004 Florian Bluemel (florian.bluemel@uni-dortmund.de)
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 #include "kana2romaji.h"
29 void utfchar(const string& from, string::size_type pos, string& to) {
30 unsigned first = from[pos];
31 if ((first & 0x80) == 0)
34 string::size_type len = 0;
35 while (first & 0x80) {
39 to = from.substr(pos, len);
44 typedef map<string, string> romaji_map;
102 romaji["ゃ"] = "\1ya";
103 romaji["ゅ"] = "\1yu";
104 romaji["ょ"] = "\1yo";
189 romaji["ヮ"] = "\1wa";
190 romaji["ャ"] = "\1ya";
191 romaji["ュ"] = "\1yu";
192 romaji["ョ"] = "\1yo";
223 // -- double width letters ------
339 string::size_type const pos,
342 // if we encounter something like
344 // remove the previous character of \1 and the \1
345 // if \1 is followed by an 'y' remove that also
348 (pos + 1 < rom.size()
349 && rom[pos + 1] == 'y'
355 void kana2romaji(const string& kana, string& rom) {
357 for (string::size_type pos = 0; pos < kana.size(); ) {
359 utfchar(kana, pos, ch);
360 romaji_map::const_iterator trans = romaji.find(ch);
361 if (trans == romaji.end()) {
364 cout << "Don't know how to translate '" << ch << "' in '" << kana << "' to romaji.\n";
367 rom += trans->second;
370 for (string::size_type pos = 0; pos < rom.size(); ++pos)
371 if (rom[pos] == '\1') {
374 string const pred = rom.substr(pos - 3, 3);
379 remove_quote_1(pos, rom);
386 string const pred = rom.substr(pos - 2, 2);
398 // shorten "ji\1y" to "j"
399 // otherwise remove "\1" and the preceding character
403 (pos + 1 < rom.size()
404 && rom[pos + 1] == 'y'
416 remove_quote_1(pos, rom);
426 else if (rom[pos] == '\2')
428 if(pos + 1 < rom.size())
429 rom[pos] = rom[pos + 1];
436 else if (rom[pos] == '\3')
440 cout << "ー is the first letter of a word. Don't know how to translate this.\n";
445 rom[pos] = rom[pos-1];