{"id":2219,"date":"2020-04-30T17:13:50","date_gmt":"2020-04-30T15:13:50","guid":{"rendered":"https:\/\/www.cjvt.starkmat.si\/tools-and-resources\/language-technologies\/"},"modified":"2024-08-20T17:00:43","modified_gmt":"2024-08-20T15:00:43","slug":"language-technologies","status":"publish","type":"page","link":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/","title":{"rendered":"Language technologies"},"content":{"rendered":"<div class='flex_column_table av-equal-height-column-flextable -flextable' ><div class=\"flex_column av_one_fourth  no_margin flex_column_table_cell av-equal-height-column av-align-middle first  avia-builder-el-0  el_before_av_three_fourth  avia-builder-el-first  \" style='padding:0px 0px 0px 0px ; border-radius:0px; '><section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><h2 class=\"tbk__title\">TOOLS AND RESOURCES<\/h2>\n<\/div><\/section><\/div>\n<div class=\"flex_column av_three_fourth  no_margin flex_column_table_cell av-equal-height-column av-align-middle av-zero-column-padding   avia-builder-el-2  el_after_av_one_fourth  el_before_av_one_fourth  \" style='border-radius:0px; '><section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><p>Language technologies include software, online services and databases.<\/p>\n<\/div><\/section><\/div><\/div><!--close column table wrapper. Autoclose: 1 -->\n<div class=\"flex_column av_one_fourth  no_margin flex_column_div first  avia-builder-el-4  el_after_av_three_fourth  el_before_av_three_fourth  sticky-stolpec column-top-margin\" style='padding:20px 50px 0px 0px ; border-radius:0px; '><p><div  class='avia-button-wrap avia-button-center  avia-builder-el-5  el_before_av_button_big  avia-builder-el-first ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/dictionaries-and-lexicons\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >DICTIONARIES AND LEXICONS<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-6  el_after_av_button_big  el_before_av_button_big ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/databases\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >DATABASES<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-7  el_after_av_button_big  el_before_av_button_big ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color-highlight '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >LANGUAGE TECHNOLOGIES<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-8  el_after_av_button_big  el_before_av_button_big ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/online-portals-and-interfaces\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >ONLINE PORTALS AND INTERFACES<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-9  el_after_av_button_big  avia-builder-el-last  gumb-viri' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >TEXT CORPORA<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><\/p><\/div>\n<div class=\"flex_column av_three_fourth  no_margin flex_column_div   avia-builder-el-10  el_after_av_one_fourth  avia-builder-el-last  column-top-margin\" style='padding:20px 0px 0px 0px ; border-radius:0px; '><section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\"><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">Senta<\/span><\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Online tool for sentence simplification and analysis<\/h5>\n<\/div>\n<p><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">Senta assesses the complexity of each sentence and simplifies the complex ones, leaving the simple ones unchanged. For both the original and the simplified text, the main features are analysed, including the proportion of words that are not in the reference list or textbook. In Senta, the simplified text can be edited in either display mode and can be copied for use outside the application.<\/span><\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/senta.cjvt.si\/en\"><img decoding=\"async\" class=\"alignnone wp-image-6369 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2024\/08\/Senta.png\" alt=\"\" width=\"518\" height=\"211\" srcset=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2024\/08\/Senta.png 518w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2024\/08\/Senta-300x122.png 300w\" sizes=\"(max-width: 518px) 100vw, 518px\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\"><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">Vejice<\/span><\/h3>\n<h5 class=\"wpk-circle-title text-custom\"><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">Web tool for automatic comma placement<\/span><\/h5>\n<\/div>\n<p><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">Use the Vejice (eng. Commas) tool to paste text of up to 3,000 characters into a box and press the red arrow. The tool then marks missing commas in grey and redundant commas in blue. It is designed to help with comma placing and is not a substitute for proofreading. According to tests, the software currently gives correct solutions in 94% of cases.<\/span><\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/orodja.cjvt.si\/vejice\/#\"><img decoding=\"async\" class=\"alignnone wp-image-3940 size-medium\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1-300x150.png\" alt=\"\" width=\"300\" height=\"150\" srcset=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1-300x150.png 300w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1-1030x515.png 1030w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1-768x384.png 768w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1-1536x768.png 1536w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1-1500x750.png 1500w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1-705x353.png 705w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/CJVT_Vejice_WHITE_1.0-01-\u2013-2000x1000-1.png 2000w\" sizes=\"(max-width: 300px) 100vw, 300px\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-13  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\"><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">Berljivost<\/span><\/h3>\n<h5 class=\"wpk-circle-title text-custom\"><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">Application for assessing the readability of texts in Slovene<\/span><\/h5>\n<\/div>\n<p><span class=\"ui-provider gs b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">The Quality of Slovene Textbooks (KaU\u010d) project has created the first application for assessing the readability of Slovene texts. It allows you to check the readability of texts of up to 5,000 characters. It will alert you to word-level problems: long, rare and repetitive words, abbreviations and vocabulary not found in textbooks. It will also mark long sentences and sentences with many or no verbs. It will also provide you with several different statistics and readability measures.<\/span><\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/orodja.cjvt.si\/berljivost\/\"><img decoding=\"async\" class=\"alignnone wp-image-3945 size-medium\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/KaUc_500-750-transparent-screen-300x150.png\" alt=\"\" width=\"300\" height=\"150\" srcset=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/KaUc_500-750-transparent-screen-300x150.png 300w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/KaUc_500-750-transparent-screen-1030x515.png 1030w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/KaUc_500-750-transparent-screen-768x384.png 768w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/KaUc_500-750-transparent-screen-705x353.png 705w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2021\/04\/KaUc_500-750-transparent-screen.png 1500w\" sizes=\"(max-width: 300px) 100vw, 300px\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-15  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">Obeliks<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Statistical tagger for Slovene<\/h5>\n<\/div>\n<p>A tagger is a computer program which segments any text into units and lets us assign specific information to individual words, i.e. parts of speech, gramamatical properties (gender, case, number, etc.) or enables us to assign its basic form in the case it has several inflected forms. The tagger can be tested <a href=\"http:\/\/oznacevalnik.slovenscina.eu\/\">here<\/a>.<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.slovenscina.eu\/tehnologije\/oznacevalnik\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-735 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-17  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<h3 class=\"circle-headline\">CJVT SVALA<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">A tool for the creation of corpora containing linguistic corrections<\/h5>\n<p>The CJVT Svala tool has been developed as a localised and adapted version of the open access Svala tool. It is used for building text corpora with linguistic corrections and similar resources where the alignment of two different versions of a text is of interest. The tool is useful for transcription, pseudonymization, alignment, as well as for marking up language corrections in texts. It currently supports tagging according to the systems used by two Slovenian corpora with language corrections: \u0160olar and KOST.<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c2c2c2; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/orodja.cjvt.si\/svala\/\" target=\"_blank\" rel=\"noopener\"><img decoding=\"async\" class=\"alignnone wp-image-5345\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2023\/06\/CJVT-svala-1.0-WHITE-300x68.png\" alt=\"\" width=\"225\" height=\"51\" srcset=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2023\/06\/CJVT-svala-1.0-WHITE-300x68.png 300w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2023\/06\/CJVT-svala-1.0-WHITE-768x175.png 768w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2023\/06\/CJVT-svala-1.0-WHITE-705x160.png 705w, https:\/\/www.cjvt.si\/wp-content\/uploads\/2023\/06\/CJVT-svala-1.0-WHITE.png 883w\" sizes=\"(max-width: 225px) 100vw, 225px\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-19  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<h3 class=\"circle-headline\">Slovenian Training Corpus SUK<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Corpus for training statistical analysers<\/h5>\n<p>SUK is a training corpus that contains manually reviewed linguistic information, which has been added to the source text. This data is used for training machine learning algorithms, which build statistical models from it, or to check the correctness of the analysis by rule-based programs. In statistical programs, such a model trained on a corpus is used to analyse new, unknown texts.<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c2c2c2; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/www.clarin.si\/repository\/xmlui\/handle\/11356\/1747\"><img decoding=\"async\" class=\"alignnone wp-image-957 size-medium\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"300\" height=\"300\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-21  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">Statistical syntactic parser for Slovene<\/h3>\n<\/div>\n<p>The <a href=\"http:\/\/eng.slovenscina.eu\/tehnologije\/razclenjevalnik\">MSTParser<\/a> is a computer program for determining the grammatical structure of a sentence automatically. This allows us to identify predicates, subject, objects etc. Syntactic parsing also represents one of the basic natural language processing procedures which supports more complex language technologies such as machine translation, information extraction, speech technologies, automatic summarization, question-answering etc.<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.slovenscina.eu\/tehnologije\/razclenjevalnik\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-735 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-23  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">ssj500k<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">A manually annotated training corpus<\/h5>\n<\/div>\n<p>The <strong><a href=\"http:\/\/www.slovenscina.eu\/tehnologije\/ucni-korpus\" target=\"_blank\" rel=\"noopener noreferrer\">ssj500k<\/a><\/strong> is a training corpus containing manually annotated grammatical information. This data is used for training computer programs for automatic text analysis which prepare a statistical model or are used to evaluate rule-based analysis programs.<br \/>\nIt contains manually validated information obtained by segmentation, tokenization, lemmatization, morphosyntactic tagging, parsing and name entity recognition..<\/p>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.slovenscina.eu\/tehnologije\/ucni-korpus\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-735 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<h3><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-25  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<h3 class=\"circle-headline\">ccGigafida in ccKres<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Open-access corpora<\/h5>\n<p><a href=\"http:\/\/eng.slovenscina.eu\/korpusi\/proste-zbirke\">ccGigafida and ccKres<\/a> are two sampled subcorpora of the Gigafida corpus and its balanced version, the Kres corpus. The ccGigafida corpus contains approximately 9% or 100 million words, taken from the <a href=\"http:\/\/eng.slovenscina.eu\/korpusi\/gigafida\">Gigafida<\/a> corpus. The ccKres contains approximately 9% or 10 million words, taken from the <a href=\"http:\/\/eng.slovenscina.eu\/korpusi\/kres\">Kres<\/a> corpus. The structure of the sample corpora is the same as the structure of their parent corpora. The ccGigafida and ccKRES corpora enable in-depth linguistic and computer (language technology) analyses of the Slovene language without any restrictions.<\/p>\n<\/div>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c2c2c2; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.slovenscina.eu\/korpusi\/proste-zbirke\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-735 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-27  el_after_av_textblock  avia-builder-el-last '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><\/p><\/div>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"parent":2214,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"_acf_changed":false,"_relevanssi_hide_post":"","_relevanssi_hide_content":"","_relevanssi_pin_for_all":"","_relevanssi_pin_keywords":"","_relevanssi_unpin_keywords":"","_relevanssi_related_keywords":"","_relevanssi_related_include_ids":"","_relevanssi_related_exclude_ids":"","_relevanssi_related_no_append":"","_relevanssi_related_not_related":"","_relevanssi_related_posts":"","_relevanssi_noindex_reason":"","inline_featured_image":false,"episode_type":"","audio_file":"","podmotor_file_id":"","podmotor_episode_id":"","cover_image":"","cover_image_id":"","duration":"","filesize":"","filesize_raw":"","date_recorded":"","explicit":"","block":"","itunes_episode_number":"","itunes_title":"","itunes_season_number":"","itunes_episode_type":"","footnotes":""},"class_list":["post-2219","page","type-page","status-publish","hentry"],"acf":[],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.3 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Language technologies - CJVT<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Language technologies - CJVT\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/\" \/>\n<meta property=\"og:site_name\" content=\"CJVT\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/centerzajezikovnevireintehnologije\" \/>\n<meta property=\"article:modified_time\" content=\"2024-08-20T15:00:43+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data1\" content=\"12 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/\",\"name\":\"Language technologies - CJVT\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/#primaryimage\"},\"image\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2024\\\/08\\\/Senta.png\",\"datePublished\":\"2020-04-30T15:13:50+00:00\",\"dateModified\":\"2024-08-20T15:00:43+00:00\",\"breadcrumb\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/#primaryimage\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2024\\\/08\\\/Senta.png\",\"contentUrl\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2024\\\/08\\\/Senta.png\"},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/language-technologies\\\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Tools and Resources\",\"item\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"Language technologies\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#website\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/\",\"name\":\"CJVT\",\"description\":\"Center za jezikovne vire in tehnologije\",\"publisher\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Organization\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#organization\",\"name\":\"CJVT\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#\\\/schema\\\/logo\\\/image\\\/\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2020\\\/06\\\/CJVT-logo-red.jpg\",\"contentUrl\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2020\\\/06\\\/CJVT-logo-red.jpg\",\"width\":1300,\"height\":683,\"caption\":\"CJVT\"},\"image\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#\\\/schema\\\/logo\\\/image\\\/\"},\"sameAs\":[\"https:\\\/\\\/www.facebook.com\\\/centerzajezikovnevireintehnologije\"]}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Language technologies - CJVT","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/","og_locale":"en_US","og_type":"article","og_title":"Language technologies - CJVT","og_url":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/","og_site_name":"CJVT","article_publisher":"https:\/\/www.facebook.com\/centerzajezikovnevireintehnologije","article_modified_time":"2024-08-20T15:00:43+00:00","og_image":[{"url":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg","type":"","width":"","height":""}],"twitter_card":"summary_large_image","twitter_misc":{"Est. reading time":"12 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/","url":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/","name":"Language technologies - CJVT","isPartOf":{"@id":"https:\/\/www.cjvt.si\/en\/#website"},"primaryImageOfPage":{"@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/#primaryimage"},"image":{"@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/#primaryimage"},"thumbnailUrl":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2024\/08\/Senta.png","datePublished":"2020-04-30T15:13:50+00:00","dateModified":"2024-08-20T15:00:43+00:00","breadcrumb":{"@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/#primaryimage","url":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2024\/08\/Senta.png","contentUrl":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2024\/08\/Senta.png"},{"@type":"BreadcrumbList","@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/www.cjvt.si\/en\/"},{"@type":"ListItem","position":2,"name":"Tools and Resources","item":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/"},{"@type":"ListItem","position":3,"name":"Language technologies"}]},{"@type":"WebSite","@id":"https:\/\/www.cjvt.si\/en\/#website","url":"https:\/\/www.cjvt.si\/en\/","name":"CJVT","description":"Center za jezikovne vire in tehnologije","publisher":{"@id":"https:\/\/www.cjvt.si\/en\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.cjvt.si\/en\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/www.cjvt.si\/en\/#organization","name":"CJVT","url":"https:\/\/www.cjvt.si\/en\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.cjvt.si\/en\/#\/schema\/logo\/image\/","url":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/06\/CJVT-logo-red.jpg","contentUrl":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/06\/CJVT-logo-red.jpg","width":1300,"height":683,"caption":"CJVT"},"image":{"@id":"https:\/\/www.cjvt.si\/en\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/centerzajezikovnevireintehnologije"]}]}},"_links":{"self":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2219","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/comments?post=2219"}],"version-history":[{"count":13,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2219\/revisions"}],"predecessor-version":[{"id":6371,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2219\/revisions\/6371"}],"up":[{"embeddable":true,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2214"}],"wp:attachment":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/media?parent=2219"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}