|
| 1 | +<?php |
| 2 | +/* |
| 3 | + * This script generates the list of synonyms |
| 4 | + * for multi-valued words in Russian Wiktionary. |
| 5 | + * Entries are lexicographically ordered. |
| 6 | + * Only Russian entries exported. |
| 7 | + * |
| 8 | + * The restrictions: |
| 9 | + * - semantic relation - only synonyms, |
| 10 | + * - 2 and more meanings, |
| 11 | + * - single words (without spaces) |
| 12 | + * - russian language |
| 13 | + * - unique synsets |
| 14 | +
|
| 15 | +Example of the fragment of generated JSON file: |
| 16 | +{ |
| 17 | + 'word':'шум', |
| 18 | + 'synsets': |
| 19 | + [ |
| 20 | + ['гам', 'гул', 'гвалт', 'грохот'], |
| 21 | + ['шумиха', 'оживление', 'суматоха'], |
| 22 | + ['ссора', 'брань', 'скандал'] |
| 23 | + ] |
| 24 | +} |
| 25 | +... |
| 26 | + */ |
| 27 | + |
| 28 | +require '../../../../vendor/autoload.php'; |
| 29 | + |
| 30 | +use piwidict\Piwidict; |
| 31 | +use piwidict\sql\{TLang, TPage, TPOS, TRelationType}; |
| 32 | +//use piwidict\widget\WForm; |
| 33 | + |
| 34 | +require '../../config_examples.php'; |
| 35 | +require '../../config_password.php'; |
| 36 | + |
| 37 | +include(LIB_DIR."header.php"); |
| 38 | + |
| 39 | +// $pw = new Piwidict(); |
| 40 | +Piwidict::setDatabaseConnection($config['hostname'], $config['user_login'], $config['user_password'], $config['dbname']); |
| 41 | +$link_db = Piwidict::getDatabaseConnection(); |
| 42 | + |
| 43 | +$wikt_lang = "ru"; // Russian language is the main language in ruwikt (Russian Wiktionary) |
| 44 | +Piwidict::setWiktLang ($wikt_lang); |
| 45 | + |
| 46 | +$lang_id = TLang::getIDByLangCode("ru"); |
| 47 | +$relation = "synonyms"; |
| 48 | +$rel_id = TRelationType::getIDByName($relation); |
| 49 | +$max_meaning=2; |
| 50 | + |
| 51 | +$fh = fopen('synsets_for_polysemy.txt','w'); |
| 52 | +fwrite($fh, "{\n"); |
| 53 | + |
| 54 | +$relations =[]; |
| 55 | + |
| 56 | +$query = "SELECT page_title as first_word, lang_pos.id as lang_pos_id, part_of_speech.name as pos_name |
| 57 | + FROM lang_pos, page, part_of_speech |
| 58 | + WHERE page.id = lang_pos.page_id |
| 59 | + AND lang_pos.pos_id = part_of_speech.id |
| 60 | + AND page_title NOT LIKE '% %' |
| 61 | + AND lang_id = $lang_id |
| 62 | + ORDER BY page_title"; |
| 63 | + |
| 64 | +$result_page = $link_db -> query_e($query,"Query failed in file <b>".__FILE__."</b>, string <b>".__LINE__."</b>"); |
| 65 | + |
| 66 | +while ($row_page = $result_page -> fetch_object()) { |
| 67 | + $query = "SELECT id from meaning WHERE meaning.lang_pos_id =".$row_page->lang_pos_id; |
| 68 | + $result_meaning = $link_db -> query_e($query,"Query failed in file <b>".__FILE__."</b>, string <b>".__LINE__."</b>"); |
| 69 | + |
| 70 | + if ($link_db -> query_count($result_meaning) >= $max_meaning) { |
| 71 | + $synsets = []; |
| 72 | + |
| 73 | + while ($row_meaning = $result_meaning -> fetch_object()) { |
| 74 | + $query = "SELECT wiki_text.text as relation_word |
| 75 | + FROM wiki_text, relation |
| 76 | + WHERE relation.wiki_text_id=wiki_text.id |
| 77 | + AND wiki_text.text NOT LIKE '% %' |
| 78 | + AND relation_type_id = $rel_id |
| 79 | + AND relation.meaning_id = ".$row_meaning->id. |
| 80 | + " ORDER BY wiki_text.text"; |
| 81 | + |
| 82 | + $result_relation = $link_db -> query_e($query,"Query failed in file <b>".__FILE__."</b>, string <b>".__LINE__."</b>"); |
| 83 | + |
| 84 | + if ($link_db -> query_count($result_relation) > 0) { |
| 85 | + $synset = []; |
| 86 | + while ($row_relation = $result_relation -> fetch_object()) { |
| 87 | + $synset[] = $row_relation->relation_word; |
| 88 | + } |
| 89 | + if (sizeof($synset)) { |
| 90 | + $synsets[] = " ['".join("', '",$synset)."']\n"; |
| 91 | + } |
| 92 | + } |
| 93 | + } |
| 94 | + $synsets = array_unique($synsets); |
| 95 | + if (sizeof($synsets) >= $max_meaning) { |
| 96 | + fwrite($fh, " {\n". |
| 97 | + " 'word':'".$row_page->first_word."',\n". |
| 98 | + " 'POS':'".$row_page->pos_name."',\n". |
| 99 | + " 'synsets':\n". |
| 100 | + " [\n". |
| 101 | + join("",$synsets). |
| 102 | + " ]\n". |
| 103 | + " }\n"); |
| 104 | + } |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +fwrite($fh, "}\n"); |
| 109 | +fclose($fh); |
| 110 | + |
| 111 | +include(LIB_DIR."footer.php"); |
| 112 | +?> |
| 113 | +<p>done. |
0 commit comments