2 jmdict, a frontend to the JMdict file. http://mandrill.fuxx0r.net/jmdict.php
3 Copyright (C) 2004 Florian Bluemel (florian.bluemel@uni-dortmund.de)
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License
7 as published by the Free Software Foundation; either version 2
8 of the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
28 #include "xmlparser.h"
29 #include "kana2romaji.h"
34 Dictionary(const string& name) : db(name) {
35 db.exec("DROP TABLE kanji");
36 db.exec("DROP TABLE reading");
37 db.exec("DROP TABLE gloss");
38 db.exec("CREATE TABLE kanji (entry INT NOT NULL, kanji TINYTEXT NOT NULL)");
39 db.exec("CREATE TABLE reading (entry INT NOT NULL, kana TINYTEXT NOT NULL, romaji TINYTEXT NOT NULL)");
40 db.exec("CREATE TABLE gloss (entry INT NOT NULL, sense INT NOT NULL, lang TINYTEXT NOT NULL, gloss TEXT NOT NULL)");
48 void createIndices() {
49 db.exec("CREATE INDEX k_entry ON kanji (entry)");
50 db.exec("CREATE INDEX r_entry ON reading (entry)");
51 db.exec("CREATE INDEX r_kana ON reading (kana)");
52 db.exec("CREATE INDEX r_romaji ON reading (romaji)");
53 db.exec("CREATE INDEX g_entry ON gloss (entry)");
54 db.exec("CREATE INDEX g_gloss ON gloss (gloss)");
57 void push(const xml::Tag& tag) {
66 xml::Tag& tag = top();
67 if (tag.name() == "ent_seq") {
68 entry_seq = atoi(tag.text().c_str());
71 else if (tag.name() == "keb")
72 insert_kanji(tag.text());
73 else if (tag.name() == "reb")
74 insert_reading(tag.text());
75 else if (tag.name() == "sense")
77 else if (tag.name() == "gloss")
78 insert_gloss(tag.attribute("xml:lang"), tag.text());
83 void insert_kanji(const string& kanji) {
84 db.exec(sql::query("INSERT INTO kanji (entry, kanji) VALUES (%u, %Q)") % entry_seq % kanji);
87 void insert_reading(const string& reading) {
89 kana2romaji(reading, romaji);
90 db.exec(sql::query("INSERT INTO reading (entry, kana, romaji) VALUES (%u, %Q, %Q)") % entry_seq % reading % romaji);
93 void insert_gloss(string lang, const string& text) {
97 sql::query("INSERT INTO gloss (entry, sense, lang, gloss) "
98 "VALUES (%u, %u, %Q, %Q)") % entry_seq % sense_seq % lang % text);
100 static unsigned seq = 0;
101 if (++seq % 50000 == 0) {
107 stack<xml::Tag> tags;
113 int main(int argc, char** argv)
115 if(argc < 2 || argc > 3) {
116 cerr << "Usage: jmdict_import <dictfile> [dest_dir]\n";
120 const string dict_file = argv[1],
121 database_name = argc == 2 ? DICTIONARY_PATH : string(argv[2]) + DICTIONARY_PATH;
124 if (std::remove(database_name.c_str()) == 0)
125 std::cout << "removed old dictionary database\n";
127 Dictionary dict(database_name);
128 xml::Parser<Dictionary> parser(dict);
130 ifstream in(dict_file.c_str());
132 cerr << "could not open dictionary file '" << dict_file << "'\n";
135 cout << "filling database... " << flush;
136 time_t start = time(0);
138 cout << time(0) - start << "s" << endl;
139 cout << "creating indices... " << flush;
141 dict.createIndices();
142 cout << time(0) - start << "s" << endl;
146 catch (const std::exception& e) {
147 cerr << e.what() << '\n';