{"id":2216,"date":"2020-04-30T15:57:48","date_gmt":"2020-04-30T13:57:48","guid":{"rendered":"https:\/\/www.cjvt.starkmat.si\/tools-and-resources\/text-corpora\/"},"modified":"2025-08-21T11:44:34","modified_gmt":"2025-08-21T09:44:34","slug":"text-corpora","status":"publish","type":"page","link":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/","title":{"rendered":"Text corpora"},"content":{"rendered":"<div class='flex_column_table av-equal-height-column-flextable -flextable' ><div class=\"flex_column av_one_fourth  no_margin flex_column_table_cell av-equal-height-column av-align-middle av-zero-column-padding first  avia-builder-el-0  el_before_av_three_fourth  avia-builder-el-first  \" style='border-radius:0px; '><section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><h2 class=\"tbk__title\">TOOLS AND RESOURCES<\/h2>\n<\/div><\/section><\/div>\n<div class=\"flex_column av_three_fourth  no_margin flex_column_table_cell av-equal-height-column av-align-middle av-zero-column-padding   avia-builder-el-2  el_after_av_one_fourth  el_before_av_one_fourth  \" style='border-radius:0px; '><section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><p>Corpora are electronic collections of authentic texts that were structured according to predefined standards and goals. They include tools for multilayered language data search.<\/p>\n<\/div><\/section><\/div><\/div><!--close column table wrapper. Autoclose: 1 -->\n<div class=\"flex_column av_one_fourth  no_margin flex_column_div first  avia-builder-el-4  el_after_av_three_fourth  el_before_av_three_fourth  sticky-stolpec column-top-margin\" style='padding:20px 50px 0px 0px ; border-radius:0px; '><p><div  class='avia-button-wrap avia-button-center  avia-builder-el-5  el_before_av_button_big  avia-builder-el-first ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/dictionaries-and-lexicons\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >DICTIONARIES AND LEXICONS<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-6  el_after_av_button_big  el_before_av_button_big ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/databases\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >DATABASES<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-7  el_after_av_button_big  el_before_av_button_big ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/language-technologies\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >LANGUAGE TECHNOLOGIES<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-8  el_after_av_button_big  el_before_av_button_big ' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/online-portals-and-interfaces\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >ONLINE PORTALS AND INTERFACES<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><br \/>\n<div  class='avia-button-wrap avia-button-center  avia-builder-el-9  el_after_av_button_big  avia-builder-el-last  gumb-viri' ><a href='https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/' class='avia-button avia-button-fullwidth   avia-icon_select-no avia-color-theme-color-highlight '  style='color:#ffffff; ' ><span class='avia_iconbox_title' >TEXT CORPORA<\/span><span class='avia_button_background avia-button avia-button-fullwidth avia-color-theme-color-highlight' ><\/span><\/a><\/div><\/p><\/div>\n<div class=\"flex_column av_three_fourth  no_margin flex_column_div   avia-builder-el-10  el_after_av_one_fourth  avia-builder-el-last  column-top-margin\" style='padding:20px 0px 0px 0px ; border-radius:0px; '><section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<h3 class=\"tbk__title\">Gigafida 2.0<\/h3>\n<h5 class=\"tbk__title\">Reference corpus of written standard Slovene<\/h5>\n<p lang=\"en-GB\" style=\"margin: 0in; font-family: Open Sans; font-size: 9.75pt; color: #333333;\"><a href=\"https:\/\/viri.cjvt.si\/gigafida\/\">Gigafida 2.0<\/a> is an extensive and thoughtfully composed reference corpus containing 1,134,693,933 words from 38,310 texts which were composed between 1990 and 2018. The Gigafida 2.0 corpus is a fundamental data source of modern Slovene used for linguistic research, describing the language (dictionaries, grammars), preparing learning materials, developing a variety of language resources and processes. Unlike the previous editions, the 2.0 version is a corpus of <span style=\"font-weight: bold;\">standard Slovene<\/span>, which means it mainly contains text that are written in the standard language.<\/p>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/viri.cjvt.si\/gigafida\/\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-727 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/gigafida-logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-12  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">\u0160olar 3.0<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Corpus of school written products<\/h5>\n<\/div>\n<p><span class=\"ui-provider hb b c d e f g h i j k l m n o p q r s t u v w x y z ab ac ae af ag ah ai aj ak\" dir=\"ltr\">\u0160olar 3.0 contains the same texts as \u0160olar 2.0, but significant improvements have been made at the level of the format, which is now a specialised XML TEI for corpora with linguistic corrections. Error annotations in some 350 texts have been manually corrected, and newer versions of tools for different levels of linguistic annotation have been used. As a result, the morphosyntactic tags more reliable and new annotation levels are available, e.g. dependency syntax and named entities. The corpus is available in the CLARIN.SI concordancers, with the <a href=\"https:\/\/www.clarin.si\/noske\/run.cgi\/corp_info?corpname=solar30_orig&amp;struct_attr_stats=1\" target=\"_blank\" rel=\"noopener\">students&#8217; source texts<\/a> and the <a href=\"https:\/\/www.clarin.si\/noske\/run.cgi\/corp_info?corpname=solar30_corr&amp;struct_attr_stats=1\" target=\"_blank\" rel=\"noopener\">teacher-corrected texts<\/a> offered separately.<\/span><\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/www.clarin.si\/noske\/run.cgi\/corp_info?corpname=solar30_orig&amp;struct_attr_stats=1\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-726 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/solar_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-14  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">\u0160olar 2.0<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Corpus of school texts<\/h5>\n<\/div>\n<p><strong><a href=\"http:\/\/solar.trojina.si\/\" target=\"_blank\" rel=\"noopener noreferrer\">\u0160olar 2.0<\/a><\/strong> is an extensive and thoughtfully composed reference corpus containing 1,134,693,933 words from 38,310 texts which were composed between 1990 and 2018. The Gigafida 2.0 corpus is a fundamental data source of modern Slovene used for linguistic research, describing the language (dictionaries, grammars), preparing learning materials, developing a variety of language resources and processes. Unlike the previous editions, the 2.0 version is a corpus of <span style=\"font-weight: bold;\">standard Slovene<\/span>, which means it mainly contains text that are written in the standard language.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.korpus-solar.net\/\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-726 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-16  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<h3 class=\"tbk__title\">Gigafida 1.0<\/h3>\n<h5 class=\"tbk__title\">The reference corpus of written standard Slovene<\/h5>\n<p><strong><a href=\"http:\/\/eng.slovenscina.eu\/korpusi\/gigafida\">Gigafida<\/a><\/strong> is an extensive collection of Slovene text of various genres, from daily newspapers, magazines, all kinds of books (fiction, non-fiction, textbooks), web pages, transcriptions of parliamentary debates and similar. It contains almost 1.2 billion words, or exactly 1,187,002,502 words. The corpus contains texts written between 1990 and 2011. The first version of the corpus was built during the project <a href=\"http:\/\/projekt.slovenscina.eu\/Vsebine\/En\/Domov\/Domov.aspx\">Communication in Slovene<\/a>.<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/www.gigafida.net\/\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-727 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-18  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">Kres<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Balanced corpus of modern written Slovene<\/h5>\n<\/div>\n<p lang=\"en-GB\" style=\"margin: 0in;\"><a href=\"https:\/\/www.clarin.si\/ske\/#dashboard?corpname=kres10\" target=\"_blank\" rel=\"noopener\"><span style=\"font-weight: bold; font-family: Open Sans; font-size: 9.75pt;\">Kres <\/span><\/a><span style=\"font-family: Open Sans; font-size: 9.75pt; color: #333333;\">was sampled from the Gigafida corpus and is a balanced corpus that contains almost 100 million words, or exactly 99,831,145 words. Basic sampling units were not entire corpus documents but random paragraphs, which means individual works are represented in a better way. In comparison to Gigafida, the Kres corpus is meant for any type of linguistic inquiries that strive to achieve a reference role that can stem form the corpus sample &#8211; a sample, with a well-thought-out, known and balanced structure.<\/span><\/p>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/www.clarin.si\/ske\/#dashboard?corpname=kres10\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-728 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/kres_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-20  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">Gos<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Corpus of spoken Slovene<\/h5>\n<p><strong><a href=\"http:\/\/eng.slovenscina.eu\/korpusi\/gos\" target=\"_blank\" rel=\"noopener noreferrer\">Gos<\/a><\/strong><span lang=\"en-GB\" style=\"font-weight: bold; font-family: Open Sans; font-size: 9.75pt; color: #333333; background: white;\">\u00a0<\/span><span lang=\"sl\" style=\"font-family: Open Sans; font-size: 9.75pt; color: #333333; background: white;\">includes <\/span><span lang=\"sl\" style=\"font-family: Open Sans; font-size: 9.75pt; color: #333333; background: white;\">the transcripts of approximately 120 hours of speech that we are exposed to on a daily basis in various situations: radio and TV shows, school lessons and lectures, private conversations between friends or within the family, work meetings, consultations, conversations in buying and selling situations, etc. All speech is transcribed in two versions \u2013 with pronunciation-based spelling and with standardized spelling \u2013 and it comprises over one million words. The corpus can be searched by means of a <\/span><span style=\"text-decoration: underline;\"><a href=\"http:\/\/www.korpus-gos.net\/\"><span lang=\"sl\" style=\"font-family: 'Open Sans'; font-size: 9.75pt; color: #333333; background: white; text-decoration: underline;\">web concordancer<\/span><\/a><\/span><span lang=\"sl\" style=\"font-family: Open Sans; font-size: 9.75pt; color: #333333; background: white;\"><span style=\"text-decoration: underline;\">;<\/span> furthermore, for all concordances it is possible to listen to the corresponding recordings.<\/span><\/p>\n<\/div>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.korpus-gos.net\/\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-724 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-22  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">\u0160olar 1.0<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Corpus of school texts<\/h5>\n<p>The <a href=\"http:\/\/eng.slovenscina.eu\/korpusi\/solar\">\u0160olar<\/a> corpus includes authentic texts written by Slovene primary and secondary school pupils. It contains one million words or, more exactly, 967,477 words. Based on the concept of foreign language learners\u2019 corpora, it is the first corpus of this type in Slovenia. It was compiled to enable researching the written linguistic capacity of the in-school population and was already used to make language resources, such as <a href=\"http:\/\/eng.slovenscina.eu\/portali\/pedagoski-slovnicni-portal\">The pedagogical grammar portal<\/a><\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.korpus-solar.net\/\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-726 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/slovenscina_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-24  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div id=\"eluid1d21f740\" class=\"circle-text-box eluid1d21f740 circletitlebox--light element-scheme--light style2\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">Lektor<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Corpus of copy-edited texts<\/h5>\n<\/div>\n<p><strong><a href=\"http:\/\/ssj.slovenscina.eu\/korpusi\/lektor\">Lektor <\/a><\/strong>is an extensive collection of copyrighted texts and translations and is intended for anyone who is interested in the process of copyediting. This type of corpus enables us to see the most frequent language errors in Slovene (excluding prefferential and stylistic corrections). It includes modern non-literary, mostly technical and popualar-science texts which were all written by different authors and corrected by different copyeditors. It contains 30,258 copyedits which are divided into 5 main categories (style, morphology, ortography, syntax and pragmatics) and 50 subcategories.<\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"http:\/\/www.korpus-lektor.net\/\" target=\"_blank\" rel=\"noopener noreferrer\"><img decoding=\"async\" class=\"aligncenter wp-image-725 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/lektor_logo-01.svg\" alt=\"\" width=\"200\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><br \/>\n<div  style='height:30px' class='hr hr-invisible   avia-builder-el-26  el_after_av_textblock  el_before_av_textblock '><span class='hr-inner ' ><span class='hr-inner-style'><\/span><\/span><\/div><br \/>\n<section class=\"av_textblock_section \"  itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock  '   itemprop=\"text\" ><table style=\"width: 100%;\">\n<tbody>\n<tr>\n<td style=\"background: #fff; border-style: none; width: 67%; padding: 0px 20px 0px 0px;\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div class=\"circle-headline\">\n<div id=\"eluid1d21f740\" class=\"circle-text-box eluid1d21f740 circletitlebox--light element-scheme--light style2\">\n<div class=\"circle-headline\">\n<h3 class=\"wpk-circle-title text-custom\">KoRP<\/h3>\n<h5 class=\"wpk-circle-title text-custom\">Corpus of written texts on public relations<\/h5>\n<\/div>\n<p><a href=\"https:\/\/www.clarin.si\/noske\/sl-spec.cgi\/first?corpname=korp&amp;reload=1&amp;iquery=&amp;queryselector=iqueryrow&amp;lemma=&amp;lpos=&amp;phrase=&amp;word=&amp;wpos=&amp;char=&amp;cql=&amp;default_attr=word&amp;fc_lemword_window_type=both&amp;fc_lemword_wsize=5&amp;fc_lemword=&amp;fc_lemword_type=all&amp;fc_pos_window_type=both&amp;fc_pos_wsize=5&amp;fc_pos_type=all&amp;usesubcorp=&amp;fsca_text.title=&amp;fsca_text.id\"><span style=\"font-family: inherit;\">KoRP <\/span><\/a><span lang=\"sl\" style=\"background: white;\">is <\/span><span lang=\"sl\" style=\"background: white;\">a synchronic monolingual corpus of written texts on public relations. It was compiled at the Faculty of Social Sciences at the University of Ljubljana. The corpus contains 1.8 million words from texts published between 1994 and 2007.<\/span><span lang=\"sl\" style=\"background: white;\"> During the <\/span><a href=\"http:\/\/www.termis.fdv.uni-lj.si\/\"><span lang=\"en-GB\" style=\"background: white;\">Termis<\/span><\/a><span lang=\"sl\" style=\"background: white;\"> project, it served as a basis for the terminology data banks for public relations.<\/span><\/p>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n<\/td>\n<td style=\"background: #ffffff; border-style: none; vertical-align: middle; text-align: center;\"><\/td>\n<td style=\"background: #c1c1c1; border-style: none; vertical-align: middle; text-align: center;\">\n<h3><a href=\"https:\/\/www.clarin.si\/noske\/sl-spec.cgi\/first?corpname=korp&amp;reload=1&amp;iquery=&amp;queryselector=iqueryrow&amp;lemma=&amp;lpos=&amp;phrase=&amp;word=&amp;wpos=&amp;char=&amp;cql=&amp;default_attr=word&amp;fc_lemword_window_type=both&amp;fc_lemword_wsize=5&amp;fc_lemword=&amp;fc_lemword_type=all&amp;fc_pos_window_type=both&amp;fc_pos_wsize=5&amp;fc_pos_type=all&amp;usesubcorp=&amp;fsca_text.title=&amp;fsca_text.id\"><img decoding=\"async\" class=\"alignnone wp-image-5406 size-full\" role=\"img\" src=\"https:\/\/www.cjvt.si\/wp-content\/uploads\/2023\/06\/KoRP-logo-white.png\" alt=\"\" width=\"266\" height=\"100\" \/><\/a><\/h3>\n<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<\/div><\/section><\/p><\/div>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"parent":2214,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"_acf_changed":false,"_relevanssi_hide_post":"","_relevanssi_hide_content":"","_relevanssi_pin_for_all":"","_relevanssi_pin_keywords":"","_relevanssi_unpin_keywords":"","_relevanssi_related_keywords":"","_relevanssi_related_include_ids":"","_relevanssi_related_exclude_ids":"","_relevanssi_related_no_append":"","_relevanssi_related_not_related":"","_relevanssi_related_posts":"","_relevanssi_noindex_reason":"","inline_featured_image":false,"episode_type":"","audio_file":"","podmotor_file_id":"","podmotor_episode_id":"","cover_image":"","cover_image_id":"","duration":"","filesize":"","filesize_raw":"","date_recorded":"","explicit":"","block":"","itunes_episode_number":"","itunes_title":"","itunes_season_number":"","itunes_episode_type":"","footnotes":""},"class_list":["post-2216","page","type-page","status-publish","hentry"],"acf":[],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.6 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>Text corpora - CJVT<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Text corpora - CJVT\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/\" \/>\n<meta property=\"og:site_name\" content=\"CJVT\" \/>\n<meta property=\"article:publisher\" content=\"https:\/\/www.facebook.com\/centerzajezikovnevireintehnologije\" \/>\n<meta property=\"article:modified_time\" content=\"2025-08-21T09:44:34+00:00\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data1\" content=\"9 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/\",\"name\":\"Text corpora - CJVT\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/#primaryimage\"},\"image\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2020\\\/03\\\/gigafida-logo-01.svg\",\"datePublished\":\"2020-04-30T13:57:48+00:00\",\"dateModified\":\"2025-08-21T09:44:34+00:00\",\"breadcrumb\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/#primaryimage\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2020\\\/03\\\/gigafida-logo-01.svg\",\"contentUrl\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2020\\\/03\\\/gigafida-logo-01.svg\"},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/text-corpora\\\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Tools and Resources\",\"item\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/tools-and-resources\\\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"Text corpora\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#website\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/\",\"name\":\"CJVT\",\"description\":\"Center za jezikovne vire in tehnologije\",\"publisher\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Organization\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#organization\",\"name\":\"CJVT\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#\\\/schema\\\/logo\\\/image\\\/\",\"url\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2020\\\/06\\\/CJVT-logo-red.jpg\",\"contentUrl\":\"https:\\\/\\\/www.cjvt.si\\\/wp-content\\\/uploads\\\/2020\\\/06\\\/CJVT-logo-red.jpg\",\"width\":1300,\"height\":683,\"caption\":\"CJVT\"},\"image\":{\"@id\":\"https:\\\/\\\/www.cjvt.si\\\/en\\\/#\\\/schema\\\/logo\\\/image\\\/\"},\"sameAs\":[\"https:\\\/\\\/www.facebook.com\\\/centerzajezikovnevireintehnologije\"]}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Text corpora - CJVT","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/","og_locale":"en_US","og_type":"article","og_title":"Text corpora - CJVT","og_url":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/","og_site_name":"CJVT","article_publisher":"https:\/\/www.facebook.com\/centerzajezikovnevireintehnologije","article_modified_time":"2025-08-21T09:44:34+00:00","twitter_card":"summary_large_image","twitter_misc":{"Est. reading time":"9 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/","url":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/","name":"Text corpora - CJVT","isPartOf":{"@id":"https:\/\/www.cjvt.si\/en\/#website"},"primaryImageOfPage":{"@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/#primaryimage"},"image":{"@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/#primaryimage"},"thumbnailUrl":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/gigafida-logo-01.svg","datePublished":"2020-04-30T13:57:48+00:00","dateModified":"2025-08-21T09:44:34+00:00","breadcrumb":{"@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/#primaryimage","url":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/gigafida-logo-01.svg","contentUrl":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/03\/gigafida-logo-01.svg"},{"@type":"BreadcrumbList","@id":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/text-corpora\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/www.cjvt.si\/en\/"},{"@type":"ListItem","position":2,"name":"Tools and Resources","item":"https:\/\/www.cjvt.si\/en\/tools-and-resources\/"},{"@type":"ListItem","position":3,"name":"Text corpora"}]},{"@type":"WebSite","@id":"https:\/\/www.cjvt.si\/en\/#website","url":"https:\/\/www.cjvt.si\/en\/","name":"CJVT","description":"Center za jezikovne vire in tehnologije","publisher":{"@id":"https:\/\/www.cjvt.si\/en\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.cjvt.si\/en\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/www.cjvt.si\/en\/#organization","name":"CJVT","url":"https:\/\/www.cjvt.si\/en\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/www.cjvt.si\/en\/#\/schema\/logo\/image\/","url":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/06\/CJVT-logo-red.jpg","contentUrl":"https:\/\/www.cjvt.si\/wp-content\/uploads\/2020\/06\/CJVT-logo-red.jpg","width":1300,"height":683,"caption":"CJVT"},"image":{"@id":"https:\/\/www.cjvt.si\/en\/#\/schema\/logo\/image\/"},"sameAs":["https:\/\/www.facebook.com\/centerzajezikovnevireintehnologije"]}]}},"_links":{"self":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2216","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/comments?post=2216"}],"version-history":[{"count":15,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2216\/revisions"}],"predecessor-version":[{"id":7403,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2216\/revisions\/7403"}],"up":[{"embeddable":true,"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/pages\/2214"}],"wp:attachment":[{"href":"https:\/\/www.cjvt.si\/en\/wp-json\/wp\/v2\/media?parent=2216"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}