Skip to content

Commit 3af60a3

Browse files
committed
synsets fo polysemy
1 parent 6c5a2c9 commit 3af60a3

File tree

1 file changed

+113
-0
lines changed

1 file changed

+113
-0
lines changed
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
<?php
2+
/*
3+
* This script generates the list of synonyms
4+
* for multi-valued words in Russian Wiktionary.
5+
* Entries are lexicographically ordered.
6+
* Only Russian entries exported.
7+
*
8+
* The restrictions:
9+
* - semantic relation - only synonyms,
10+
* - 2 and more meanings,
11+
* - single words (without spaces)
12+
* - russian language
13+
* - unique synsets
14+
15+
Example of the fragment of generated JSON file:
16+
{
17+
'word':'шум',
18+
'synsets':
19+
[
20+
['гам', 'гул', 'гвалт', 'грохот'],
21+
['шумиха', 'оживление', 'суматоха'],
22+
['ссора', 'брань', 'скандал']
23+
]
24+
}
25+
...
26+
*/
27+
28+
require '../../../../vendor/autoload.php';
29+
30+
use piwidict\Piwidict;
31+
use piwidict\sql\{TLang, TPage, TPOS, TRelationType};
32+
//use piwidict\widget\WForm;
33+
34+
require '../../config_examples.php';
35+
require '../../config_password.php';
36+
37+
include(LIB_DIR."header.php");
38+
39+
// $pw = new Piwidict();
40+
Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']);
41+
$link_db = Piwidict::getDatabaseConnection();
42+
43+
$wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary)
44+
Piwidict::setWiktLang ($wikt_lang);
45+
46+
$lang_id = TLang::getIDByLangCode("ru");
47+
$relation = "synonyms";
48+
$rel_id = TRelationType::getIDByName($relation);
49+
$max_meaning=2;
50+
51+
$fh = fopen('synsets_for_polysemy.txt','w');
52+
fwrite($fh, "{\n");
53+
54+
$relations =[];
55+
56+
$query = "SELECT page_title as first_word, lang_pos.id as lang_pos_id, part_of_speech.name as pos_name
57+
FROM lang_pos, page, part_of_speech
58+
WHERE page.id = lang_pos.page_id
59+
AND lang_pos.pos_id = part_of_speech.id
60+
AND page_title NOT LIKE '% %'
61+
AND lang_id = $lang_id
62+
ORDER BY page_title";
63+
64+
$result_page = $link_db -> query_e($query,"Query failed in file <b>".__FILE__."</b>, string <b>".__LINE__."</b>");
65+
66+
while ($row_page = $result_page -> fetch_object()) {
67+
$query = "SELECT id from meaning WHERE meaning.lang_pos_id =".$row_page->lang_pos_id;
68+
$result_meaning = $link_db -> query_e($query,"Query failed in file <b>".__FILE__."</b>, string <b>".__LINE__."</b>");
69+
70+
if ($link_db -> query_count($result_meaning) >= $max_meaning) {
71+
$synsets = [];
72+
73+
while ($row_meaning = $result_meaning -> fetch_object()) {
74+
$query = "SELECT wiki_text.text as relation_word
75+
FROM wiki_text, relation
76+
WHERE relation.wiki_text_id=wiki_text.id
77+
AND wiki_text.text NOT LIKE '% %'
78+
AND relation_type_id = $rel_id
79+
AND relation.meaning_id = ".$row_meaning->id.
80+
" ORDER BY wiki_text.text";
81+
82+
$result_relation = $link_db -> query_e($query,"Query failed in file <b>".__FILE__."</b>, string <b>".__LINE__."</b>");
83+
84+
if ($link_db -> query_count($result_relation) > 0) {
85+
$synset = [];
86+
while ($row_relation = $result_relation -> fetch_object()) {
87+
$synset[] = $row_relation->relation_word;
88+
}
89+
if (sizeof($synset)) {
90+
$synsets[] = " ['".join("', '",$synset)."']\n";
91+
}
92+
}
93+
}
94+
$synsets = array_unique($synsets);
95+
if (sizeof($synsets) >= $max_meaning) {
96+
fwrite($fh, " {\n".
97+
" 'word':'".$row_page->first_word."',\n".
98+
" 'POS':'".$row_page->pos_name."',\n".
99+
" 'synsets':\n".
100+
" [\n".
101+
join("",$synsets).
102+
" ]\n".
103+
" }\n");
104+
}
105+
}
106+
}
107+
108+
fwrite($fh, "}\n");
109+
fclose($fh);
110+
111+
include(LIB_DIR."footer.php");
112+
?>
113+
<p>done.

0 commit comments

Comments
 (0)