<?xml version="1.0"?>
<!DOCTYPE article
PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.4 20190208//EN"
       "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="1.4" xml:lang="en">
 <front>
  <journal-meta>
   <journal-id journal-id-type="publisher-id">Russian Journal of Biological Physics and Chemisrty</journal-id>
   <journal-title-group>
    <journal-title xml:lang="en">Russian Journal of Biological Physics and Chemisrty</journal-title>
    <trans-title-group xml:lang="ru">
     <trans-title>АКТУАЛЬНЫЕ ВОПРОСЫ БИОЛОГИЧЕСКОЙ ФИЗИКИ И ХИМИИ</trans-title>
    </trans-title-group>
   </journal-title-group>
   <issn publication-format="print">2499-9962</issn>
  </journal-meta>
  <article-meta>
   <article-id pub-id-type="publisher-id">83706</article-id>
   <article-id pub-id-type="doi">10.29039/rusjbpc.2023.0640</article-id>
   <article-categories>
    <subj-group subj-group-type="toc-heading" xml:lang="ru">
     <subject>МОДЕЛИРОВАНИЕ В БИОФИЗИКЕ И БИОИНФОРМАТИКА</subject>
    </subj-group>
    <subj-group subj-group-type="toc-heading" xml:lang="en">
     <subject>MODELLING IN BIOPHYCIS AND BIOINFORMATISC</subject>
    </subj-group>
    <subj-group>
     <subject>МОДЕЛИРОВАНИЕ В БИОФИЗИКЕ И БИОИНФОРМАТИКА</subject>
    </subj-group>
   </article-categories>
   <title-group>
    <article-title xml:lang="en">COMPUTATIONAL TOOLS FOR THE DNA TEXT COMPLEXITY ESTIMATES FOR MICROBIAL GENOMES STRUCTURE ANALYSIS</article-title>
    <trans-title-group xml:lang="ru">
     <trans-title>КОМПЬЮТЕРНЫЕ ПРОГРАММЫ ОЦЕНКИ СЛОЖНОСТИ ТЕКСТА ДНК ДЛЯ АНАЛИЗА СТРУКТУРЫ ГЕНОМОВ МИКРООРГАНИЗМОВ</trans-title>
    </trans-title-group>
   </title-group>
   <contrib-group content-type="authors">
    <contrib contrib-type="author">
     <name-alternatives>
      <name xml:lang="ru">
       <surname>Митина</surname>
       <given-names>А. В.</given-names>
      </name>
      <name xml:lang="en">
       <surname>Mitina</surname>
       <given-names>A. V.</given-names>
      </name>
     </name-alternatives>
     <xref ref-type="aff" rid="aff-1"/>
    </contrib>
    <contrib contrib-type="author">
     <name-alternatives>
      <name xml:lang="ru">
       <surname>Орлова</surname>
       <given-names>Н. Г.</given-names>
      </name>
      <name xml:lang="en">
       <surname>Orlova</surname>
       <given-names>N. G.</given-names>
      </name>
     </name-alternatives>
     <xref ref-type="aff" rid="aff-2"/>
    </contrib>
    <contrib contrib-type="author">
     <name-alternatives>
      <name xml:lang="ru">
       <surname>Дергилев</surname>
       <given-names>А. И.</given-names>
      </name>
      <name xml:lang="en">
       <surname>Dergilev</surname>
       <given-names>A. I.</given-names>
      </name>
     </name-alternatives>
     <xref ref-type="aff" rid="aff-3"/>
     <xref ref-type="aff" rid="aff-4"/>
    </contrib>
    <contrib contrib-type="author">
     <name-alternatives>
      <name xml:lang="ru">
       <surname>Орлов</surname>
       <given-names>Юрий Львович</given-names>
      </name>
      <name xml:lang="en">
       <surname>Orlov</surname>
       <given-names>Yuriy L'vovich</given-names>
      </name>
     </name-alternatives>
     <email>orlov@d-health.institute</email>
     <bio xml:lang="ru">
      <p>доктор биологических наук;</p>
     </bio>
     <bio xml:lang="en">
      <p>doctor of sciences in biology;</p>
     </bio>
     <xref ref-type="aff" rid="aff-5"/>
     <xref ref-type="aff" rid="aff-6"/>
     <xref ref-type="aff" rid="aff-7"/>
     <xref ref-type="aff" rid="aff-8"/>
    </contrib>
   </contrib-group>
   <aff-alternatives id="aff-1">
    <aff>
     <institution xml:lang="ru">Первый МГМУ им. И.М. Сеченова Минздрава России (Сеченовский Университет)</institution>
     <city>Москва</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">Sechenov University</institution>
     <city>Moscow</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-2">
    <aff>
     <institution xml:lang="ru">Финансовый Университет при Правительстве РФ</institution>
     <city>Москва</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">Financial University under the Government of the Russian Federation</institution>
     <city>Moscow</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-3">
    <aff>
     <institution xml:lang="ru">Новосибирский государственный университет</institution>
     <city>Новосибирск</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">Novosibirsk State University</institution>
     <city>Novosibirsk</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-4">
    <aff>
     <institution xml:lang="ru">Институт цитологии и генетики СО РАН</institution>
     <city>Новосибирск</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">Institute of Cytology and Genetics SB RAS</institution>
     <city>Novosibirsk</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-5">
    <aff>
     <institution xml:lang="ru">Первый МГМУ им. И.М. Сеченова Минздрава России (Сеченовский Университет)</institution>
     <city>Москва</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">Sechenov University</institution>
     <city>Moscow</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-6">
    <aff>
     <institution xml:lang="ru">Институт цитологии и генетики СО РАН</institution>
     <city>Новосибирск</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">Institute of Cytology and Genetics SB RAS</institution>
     <city>Novosibirsk</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-7">
    <aff>
     <institution xml:lang="ru">Новосибирский государственный университет</institution>
     <city>Новосибирск</city>
     <country>Россия</country>
    </aff>
    <aff>
     <institution xml:lang="en">Novosibirsk State University</institution>
     <city>Novosibirsk</city>
     <country>Russian Federation</country>
    </aff>
   </aff-alternatives>
   <aff-alternatives id="aff-8">
    <aff>
     <institution xml:lang="ru">Российский университет дружбы народов</institution>
    </aff>
    <aff>
     <institution xml:lang="en">Peoples’ Friendship University of Russia</institution>
    </aff>
   </aff-alternatives>
   <pub-date publication-format="print" date-type="pub" iso-8601-date="2024-06-06T08:46:28+03:00">
    <day>06</day>
    <month>06</month>
    <year>2024</year>
   </pub-date>
   <pub-date publication-format="electronic" date-type="pub" iso-8601-date="2024-06-06T08:46:28+03:00">
    <day>06</day>
    <month>06</month>
    <year>2024</year>
   </pub-date>
   <volume>8</volume>
   <issue>4</issue>
   <fpage>408</fpage>
   <lpage>416</lpage>
   <history>
    <date date-type="received" iso-8601-date="2023-08-02T00:00:00+03:00">
     <day>02</day>
     <month>08</month>
     <year>2023</year>
    </date>
   </history>
   <self-uri xlink:href="https://rusjbpc.ru/en/nauka/article/83706/view">https://rusjbpc.ru/en/nauka/article/83706/view</self-uri>
   <abstract xml:lang="ru">
    <p>Одна из классических задач биоинформатики - поиск повторов и статистически неоднородных участков последовательностей ДНК и полных геномов микроорганизмов. Теоретические подходы к исследованию сложности текста последовательностей макромолекул - ДНК, РНК и белков – развивались до появления полных геномных последовательностей и получили новый импульс в связи с распространением технологий массового параллельного секвенирования и бурным ростом доступных данных. Рассматриваются современные компьютерные методы и существующие программы оценки сложности текста ДНК и построения профиля свойств для анализа структуры геномов микроорганизмов. Дан обзор доступных онлайн-программ для поиска и визуализации повторов текста. Представлена собственная компьютерная реализация метода оценки лингвистической сложности текста и сжатия по Лемпелю-Зиву для выявления структурных особенностей и аномалий геномов микроорганизмов. Представлены примеры профилей анализа сложности текста. Рассмотрено применение оценок сложности к анализу последовательности генома коронавируса SARS-CoV2, последовательности вируса эндемического паротита Mumps Orthorubulavirus. Выявлены участки низкой сложности текста.</p>
   </abstract>
   <trans-abstract xml:lang="en">
    <p>One of the fundamental tasks in bioinformatics involves searching for repeats, which are statistically heterogeneous segments within DNA sequences and complete genomes of microorganisms. Theoretical approaches to analyzing the complexity of macromolecule sequences (DNA, RNA, and proteins) were established prior to the availability of complete genomic sequences. These approaches have experienced a resurgence due to the proliferation of mass parallel sequencing technologies and the exponential growth of accessible data. This article explores contemporary computer methods and existing programs designed to assess DNA text complexity as well as construct profiles of properties for analysing the genomic structures of microorganisms. The article offers a comprehensive overview of available online programs designed for detecting and visualising repeats within genetic text. Furthermore, the paper introduces a novel computer-based implementation of a method to evaluate the linguistic complexity of text and its compression using Lempel-Ziv. This approach aims to identify structural features and anomalies within the genomes of microorganisms. The article also provides examples of profiles generated through the analysis of text complexity. Application of these complexity estimates in the analysis of genome sequences, such as those of the SARS-CoV-2 coronavirus and the Mumps Orthorubulavirus, is discussed. Specific areas of low complexity within the genetic text have been successfully identified in this research.</p>
   </trans-abstract>
   <kwd-group xml:lang="ru">
    <kwd>биоинформатика</kwd>
    <kwd>биофизические модели</kwd>
    <kwd>сложность текста</kwd>
    <kwd>геномы микроорганизмов</kwd>
   </kwd-group>
   <kwd-group xml:lang="en">
    <kwd>bioinformatics</kwd>
    <kwd>biophysical models</kwd>
    <kwd>text complexity</kwd>
    <kwd>microbial genomes</kwd>
   </kwd-group>
   <funding-group>
    <funding-statement xml:lang="ru">Работа поддержана грантом РНФ (23-44-00030).</funding-statement>
   </funding-group>
  </article-meta>
 </front>
 <body>
  <p></p>
 </body>
 <back>
  <ref-list>
   <ref id="B1">
    <label>1.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Simoes R.P., Wolf I.R., Correa B.A., Valente G.T. Uncovering patterns of the evolution of genomic sequence entropy and complexity. Mol Genet Genomics, 2021, vol. 296, no. 2, pp. 289-298, doi: 10.1007/s00438-020-01729-y.</mixed-citation>
     <mixed-citation xml:lang="en">Simoes R.P., Wolf I.R., Correa B.A., Valente G.T. Uncovering patterns of the evolution of genomic sequence entropy and complexity. Mol Genet Genomics, 2021, vol. 296, no. 2, pp. 289-298, doi: 10.1007/s00438-020-01729-y.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B2">
    <label>2.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Orlov Y.L., Potapov V.N. Complexity: an internet resource for analysis of DNA sequence complexity. Nucleic Acids Res., 2004, vol. 32, pp. W628-W633, doi: 10.1093/nar/gkh466.</mixed-citation>
     <mixed-citation xml:lang="en">Orlov Y.L., Potapov V.N. Complexity: an internet resource for analysis of DNA sequence complexity. Nucleic Acids Res., 2004, vol. 32, pp. W628-W633, doi: 10.1093/nar/gkh466.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B3">
    <label>3.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Bartal A., Jagodnik K.M. Progress in and Opportunities for Applying Information Theory to Computational Biology and Bioinformatics. Entropy (Basel), 2022, vol. 24, no. 7, pp. 925, doi: 10.3390/e24070925.</mixed-citation>
     <mixed-citation xml:lang="en">Bartal A., Jagodnik K.M. Progress in and Opportunities for Applying Information Theory to Computational Biology and Bioinformatics. Entropy (Basel), 2022, vol. 24, no. 7, pp. 925, doi: 10.3390/e24070925.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B4">
    <label>4.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Bernaola-Galvan P., Carpena P., Gomez-Martin C., Oliver J.L. Compositional Structure of the Genome: A Review. Biology (Basel), 2023, vol. 12, no. 6, p. 849, doi: 10.3390/biology12060849.</mixed-citation>
     <mixed-citation xml:lang="en">Bernaola-Galvan P., Carpena P., Gomez-Martin C., Oliver J.L. Compositional Structure of the Genome: A Review. Biology (Basel), 2023, vol. 12, no. 6, p. 849, doi: 10.3390/biology12060849.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B5">
    <label>5.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Chang C.H., Hsieh L.C., Chen T.Y., Chen H.D., Luo L., Lee H.C. Shannon information in complete genomes. J. Bioinform. Comput. Biol., 2005, vol. 3, no. 3, pp. 587-608, doi: 10.1142/s0219720005001181.</mixed-citation>
     <mixed-citation xml:lang="en">Chang C.H., Hsieh L.C., Chen T.Y., Chen H.D., Luo L., Lee H.C. Shannon information in complete genomes. J. Bioinform. Comput. Biol., 2005, vol. 3, no. 3, pp. 587-608, doi: 10.1142/s0219720005001181.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B6">
    <label>6.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Olson W.K., Zhurkin V.B. Modeling DNA deformations. Curr Opin Struct Biol., 2000, vol. 10, no. 3, pp. 286-297, doi: 10.1016/s0959-440x(00)00086-5.</mixed-citation>
     <mixed-citation xml:lang="en">Olson W.K., Zhurkin V.B. Modeling DNA deformations. Curr Opin Struct Biol., 2000, vol. 10, no. 3, pp. 286-297, doi: 10.1016/s0959-440x(00)00086-5.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B7">
    <label>7.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Orlov Y.L., Filippov V.P., Potapov V.N., Kolchanov N.A. Construction of stochastic context trees for genetic texts. In Silico Biol., 2002, vol. 2, no. 3, pp. 233-247.</mixed-citation>
     <mixed-citation xml:lang="en">Orlov Y.L., Filippov V.P., Potapov V.N., Kolchanov N.A. Construction of stochastic context trees for genetic texts. In Silico Biol., 2002, vol. 2, no. 3, pp. 233-247.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B8">
    <label>8.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Chanda P., Costa E., Hu J., Sukumar S., Van Hemert J., Walia R. Information Theory in Computational Biology: Where We Stand Today. Entropy, 2020, vol. 22, no. 6, p. 627, doi: 10.3390/e22060627.</mixed-citation>
     <mixed-citation xml:lang="en">Chanda P., Costa E., Hu J., Sukumar S., Van Hemert J., Walia R. Information Theory in Computational Biology: Where We Stand Today. Entropy, 2020, vol. 22, no. 6, p. 627, doi: 10.3390/e22060627.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B9">
    <label>9.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Akbari Rokn Abadi S., Mohammadi A., Koohi S. A new profiling approach for DNA sequences based on the nucleotides' physicochemical features for accurate analysis of SARS-CoV-2 genomes. BMC Genomics, 2023, vol. 24, no. 1, p. 266, doi: 10.1186/s12864-023-09373-7.</mixed-citation>
     <mixed-citation xml:lang="en">Akbari Rokn Abadi S., Mohammadi A., Koohi S. A new profiling approach for DNA sequences based on the nucleotides' physicochemical features for accurate analysis of SARS-CoV-2 genomes. BMC Genomics, 2023, vol. 24, no. 1, p. 266, doi: 10.1186/s12864-023-09373-7.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B10">
    <label>10.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Altschul S.F., Madden T.L., Schaffer A.A., Zhang J., Zhang Z., Miller W., Lipman D.J. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res., 1997, vol. 25, no. 17, pp. 3389-3402, doi: 10.1093/nar/25.17.3389.</mixed-citation>
     <mixed-citation xml:lang="en">Altschul S.F., Madden T.L., Schaffer A.A., Zhang J., Zhang Z., Miller W., Lipman D.J. Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. Nucleic Acids Res., 1997, vol. 25, no. 17, pp. 3389-3402, doi: 10.1093/nar/25.17.3389.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B11">
    <label>11.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Berselli M., Lavezzo E., Toppo S. NeSSie: a tool for the identification of approximate DNA sequence symmetries. Bioinformatics, 2018, vol. 34, no. 14, pp. 2503-2505, doi: 10.1093/bioinformatics/bty142.</mixed-citation>
     <mixed-citation xml:lang="en">Berselli M., Lavezzo E., Toppo S. NeSSie: a tool for the identification of approximate DNA sequence symmetries. Bioinformatics, 2018, vol. 34, no. 14, pp. 2503-2505, doi: 10.1093/bioinformatics/bty142.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B12">
    <label>12.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Andersen E.S. Prediction and design of DNA and RNA structures. New Biotechnology, 2010, vol. 27, no. 3, pp. 184-193, doi: 10.1016/j.nbt.2010.02.012.</mixed-citation>
     <mixed-citation xml:lang="en">Andersen E.S. Prediction and design of DNA and RNA structures. New Biotechnology, 2010, vol. 27, no. 3, pp. 184-193, doi: 10.1016/j.nbt.2010.02.012.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B13">
    <label>13.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Shi X., Teng H., Sun Z. An updated overview of experimental and computational approaches to identify non-canonical DNA/RNA structures with emphasis on G-quadruplexes and R-loops. Brief Bioinform., 2022, vol. 23, no. 6, pp. bbac441, doi: 10.1093/bib/bbac441.</mixed-citation>
     <mixed-citation xml:lang="en">Shi X., Teng H., Sun Z. An updated overview of experimental and computational approaches to identify non-canonical DNA/RNA structures with emphasis on G-quadruplexes and R-loops. Brief Bioinform., 2022, vol. 23, no. 6, pp. bbac441, doi: 10.1093/bib/bbac441.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B14">
    <label>14.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Narad P., Kumar A., Chakraborty A., Patni P., Sengupta A., Wadhwa G., Upadhyaya K.C. Transcription Factor Information System (TFIS): A Tool for Detection of Transcription Factor Binding Sites. Interdiscip Sci., 2017, vol. 9, no. 3, pp. 378-391, doi: 10.1007/s12539-016-0168-5.</mixed-citation>
     <mixed-citation xml:lang="en">Narad P., Kumar A., Chakraborty A., Patni P., Sengupta A., Wadhwa G., Upadhyaya K.C. Transcription Factor Information System (TFIS): A Tool for Detection of Transcription Factor Binding Sites. Interdiscip Sci., 2017, vol. 9, no. 3, pp. 378-391, doi: 10.1007/s12539-016-0168-5.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B15">
    <label>15.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Сафронова Н.С., Пономаренко М.П., Абнизова И.И., Орлова Г.В., Чадаева И.В., Орлов Ю.Л. Фланкирующие повторы мономеров определяют пониженную контекстную сложность сайтов однонуклеотидных полиморфизмов в геноме человека. Вавиловский журнал генетики и селекции, 2015, т. 19, № 6, с. 668-674, doi: 10.18699/VJ15.092.</mixed-citation>
     <mixed-citation xml:lang="en">Safronova N.S., Ponomarenko M.P., Abnizova I.I., Orlova G.V., Chadaeva I.V., Orlov Y.L. Flanking monomer repeats determine decreased context complexity of single nucleotide polymorphism sites in the human genome. Russian Journal of Genetics: Applied Research, 2016, vol. 6, no. 8, pp. 809-815 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B16">
    <label>16.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Vityaev E.E., Orlov Y.L., Vishnevsky O.V., Pozdnyakov M.A., Kolchanov N.A. Computer system &quot;Gene Discovery&quot; for promoter structure analysis. In Silico Biol., 2002, vol. 2, pp. 257-262.</mixed-citation>
     <mixed-citation xml:lang="en">Vityaev E.E., Orlov Y.L., Vishnevsky O.V., Pozdnyakov M.A., Kolchanov N.A. Computer system &quot;Gene Discovery&quot; for promoter structure analysis. In Silico Biol., 2002, vol. 2, pp. 257-262.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B17">
    <label>17.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Babenko V., Chadaeva I., Orlov Y. Genomic landscape of CpG rich elements in human genome. BMC evolutionary biology, 2017, vol. 17, suppl. 1, pp. 19, doi: 10.1186/s12862-016-0864-0.</mixed-citation>
     <mixed-citation xml:lang="en">Babenko V., Chadaeva I., Orlov Y. Genomic landscape of CpG rich elements in human genome. BMC evolutionary biology, 2017, vol. 17, suppl. 1, pp. 19, doi: 10.1186/s12862-016-0864-0.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B18">
    <label>18.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Babenko V.N., Bogomolov A.G., Babenko R.O., Galieva E.R., Orlov Y.L. CpG islands’ clustering uncovers early development genes in the human genome. Computer Science and Information Systems, 2018, vol. 15, no. 2, рр. 473-485, doi: 10.2298/CSIS170523004B.</mixed-citation>
     <mixed-citation xml:lang="en">Babenko V.N., Bogomolov A.G., Babenko R.O., Galieva E.R., Orlov Y.L. CpG islands’ clustering uncovers early development genes in the human genome. Computer Science and Information Systems, 2018, vol. 15, no. 2, pp. 473-485, doi: 10.2298/CSIS170523004B.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B19">
    <label>19.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Орлов Ю.Л., Левицкий В.Г., Смирнова О.Г., Подколодная О.А., Хлебодарова Т.М., Колчанов Н.А. Статистический анализ последовательностей ДНК, содержащих сайты формирования нуклеосом. Биофизика, 2006, т. 51, с. 608-14 .</mixed-citation>
     <mixed-citation xml:lang="en">Orlov Y.L., Levitskii V.G., Smirnova O.G., Podkolodnaya O.A., Khlebodarova T.M., Kolchanov N.A. Statistical analysis of DNA sequences containing nucleosome positioning sites. Biophysics, 2006, vol. 51, no. 4, pp. 541-546 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B20">
    <label>20.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Goh W.S., Orlov Y., Li J., Clarke N.D. Blurring of high-resolution data shows that the effect of intrinsic nucleosome occupancy on transcription factor binding is mostly regional, not local. PLoS Comput Biol., 2010, vol. 6, no. 1, e1000649, doi: 10.1371/journal.pcbi.1000649.</mixed-citation>
     <mixed-citation xml:lang="en">Goh W.S., Orlov Y., Li J., Clarke N.D. Blurring of high-resolution data shows that the effect of intrinsic nucleosome occupancy on transcription factor binding is mostly regional, not local. PLoS Comput Biol., 2010, vol. 6, no. 1, e1000649, doi: 10.1371/journal.pcbi.1000649.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B21">
    <label>21.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Дергилев А.И., Спицина А.М., Чадаева И.В., Свичкарев А.В., Науменко Ф.М., Кулакова Е.В., Витяев Е.Е., Чен М., Орлов Ю.Л. Компьютерный анализ совместной локализации сайтов связывания транскрипционных факторов по данным ChIP-seq. Вавиловский журнал генетики и селекции, 2016, т. 20, № 6, с. 770-778, doi: 10.18699/VJ16.194 .</mixed-citation>
     <mixed-citation xml:lang="en">Dergilev A.I., Spitsina A.M., Chadaeva I.V., Svichkarev A.V., NAumenko F.M., Kulakova E.V., Vityaev E.E., Chen M., Orlov Y.L. Computer analysis of colocalization of the TFs’ binding sites in the genome according to the ChIP-seq data. Russian Journal of Genetics: Applied Research, 2017, vol. 7, no. 5, pp. 513-522 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B22">
    <label>22.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Alipanahi B., Delong A., Weirauch M.T., Frey B.J. Predicting the sequence specificities of DNA- and RNA-binding proteins by deep learning. Nat Biotechnol., 2015, vol. 33, no. 8, pp. 831-838, doi: 10.1038/nbt.3300.</mixed-citation>
     <mixed-citation xml:lang="en">Alipanahi B., Delong A., Weirauch M.T., Frey B.J. Predicting the sequence specificities of DNA- and RNA-binding proteins by deep learning. Nat Biotechnol., 2015, vol. 33, no. 8, pp. 831-838, doi: 10.1038/nbt.3300.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B23">
    <label>23.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Митина А.В., Орлов Ю.Л. Оценка лингвистической сложности генетических последовательностей штаммов SARS-CoV-2. Сборник научных трудов VII Съезда Биофизиков России: в 2 томах, том 1 - Краснодар: Типография ФГБОУ ВО «КубГТУ», 2023, c. 330, doi: 10.26297/SbR6.2023.001 .</mixed-citation>
     <mixed-citation xml:lang="en">Mitina A.V., Orlov Y.L. The estimates of linguistic complexity of genetic sequences of SARS-CoV-2 stamms. Collection of scientific papers of the VII Congress of Biophysicists of Russia: in 2 volumes, vol.1 - Krasnodar: Printing house of FGBOU VO &quot;KubGTU&quot;, 2023, p. 330 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B24">
    <label>24.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Orlov Y.L., Gusev V.D., Miroshnichenko L.A. LZcomposer: Decomposition of Genomic Sequences by Repeat Fragments. Biofizika, 2003, vol. 48, suppl. 1, pp. S7-S16.</mixed-citation>
     <mixed-citation xml:lang="en">Orlov Y.L., Gusev V.D., Miroshnichenko L.A. LZcomposer: Decomposition of Genomic Sequences by Repeat Fragments. Biofizika, 2003, vol. 48, suppl. 1, pp. S7-S16.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B25">
    <label>25.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Wu C., Chen J., Liu Y., Hu X. Improved Prediction of Regulatory Element Using Hybrid Abelian Complexity Features with DNA Sequences. International Journal of Molecular Sciences, 2019, vol. 20, no. 7, p. 1704, doi: 10.3390/ijms20071704.</mixed-citation>
     <mixed-citation xml:lang="en">Wu C., Chen J., Liu Y., Hu X. Improved Prediction of Regulatory Element Using Hybrid Abelian Complexity Features with DNA Sequences. International Journal of Molecular Sciences, 2019, vol. 20, no. 7, p. 1704, doi: 10.3390/ijms20071704.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B26">
    <label>26.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Орлов Ю.Л., Митина А.В., Суслов В.В., Дергилев А.И. Компьютерные оценки информационной сложности геномов прокариот. Тезисы докладов 4-й Всероссийской конференции по астробиологии «Геологические, биологические и биогеохимические процессы в решении астробиологических задач» 27 февраля - 2 марта 2023 г., г.Пущино. Институт физико-химических и биологических проблем почвоведения РАН, с. 20-22.</mixed-citation>
     <mixed-citation xml:lang="en">Orlov Y.L., Mitina A.V., Suslov V.V., Dergilev A.I. Computer estimates of the information complexity of prokaryotic genomes. Abstracts of the 4th All-Russian Conference on Astrobiology &quot;Geological, biological and biogeochemical processes in solving astrobiological problems&quot; February 27 - March 2, 2023, Pushchino. Institute of Physicochemical and Biological Problems of Soil Science RAS, pp. 20-22 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B27">
    <label>27.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Суслов В.В., Афонников Д.А., Подколодный Н.Л., Орлов Ю.Л. Особенности геномного контекста и GC состав генома прокариот в связи с эволюцией среды обитания. Палеонтологический журнал, 2013, т. 47, № 9, с. 1056-1060, doi: 10.1134/S0031030113090220.</mixed-citation>
     <mixed-citation xml:lang="en">Suslov V.V., Afonnikov D.A., Podkolodny N.L., Orlov Y.L. Genome features and GC content in prokaryotic genomes in connection with environmental evolution. Paleontological Journal, 2013, vol. 47, no. 9, pp. 1056-1060 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B28">
    <label>28.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Safronova N.S., Babenko V.N., Orlov Y.L. 117 Analysis of SNP containing sites in human genome using text complexity estimates. Journal of Biomolecular Structure and Dynamics, 2015, vol. 33, suppl. 1, pp. 73-74, doi: 10.1080/07391102.2015.1032750.</mixed-citation>
     <mixed-citation xml:lang="en">Safronova N.S., Babenko V.N., Orlov Y.L. 117 Analysis of SNP containing sites in human genome using text complexity estimates. Journal of Biomolecular Structure and Dynamics, 2015, vol. 33, suppl. 1, pp. 73-74, doi: 10.1080/07391102.2015.1032750.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B29">
    <label>29.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Дергилев А.И., Орлова Н.Г., Митина А.В., Орлов Ю.Л. Применение методов оценки сложности текста к анализу геномных кластеров сайтов связывания транскрипционных факторов. Сборник научных трудов VII Съезда Биофизиков России: в 2 томах, том 1 - Краснодар: Типография ФГБОУ ВО «КубГТУ», 2023, с. 335-336, doi: 10.26297/SbR6.2023.001.</mixed-citation>
     <mixed-citation xml:lang="en">Dergilev A.I., Orlova N.G., Mitina A.V., Orlov Y.L. Application of methods for assessing text complexity to the analysis of genomic clusters of transcription factor binding sites. Collection of scientific papers of the VII Congress of Biophysicists of Russia: in 2 volumes, vol.1 - Krasnodar: Printing house of FGBOU VO &quot;KubGTU&quot;, 2023, pp. 335-336 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B30">
    <label>30.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Dergilev A.I., Orlova N.G., Dobrovolskaya O.B., Orlov Y.L. Statistical estimates of multiple transcription factors binding in the model plant genomes based on ChIP-seq data. J Integr Bioinform., 2021, vol. 19, no. 1, p. 20200036, doi: 10.1515/jib-2020-0036.</mixed-citation>
     <mixed-citation xml:lang="en">Dergilev A.I., Orlova N.G., Dobrovolskaya O.B., Orlov Y.L. Statistical estimates of multiple transcription factors binding in the model plant genomes based on ChIP-seq data. J Integr Bioinform., 2021, vol. 19, no. 1, p. 20200036, doi: 10.1515/jib-2020-0036.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B31">
    <label>31.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Принглаева А.М., Дергилев А.И., Панова А.Д., Орлов Ю.Л. Сложность текста и структура повторов генома на примере коронавируса. Марчуковские научные чтения 2020: Тезисы Междунар. конф., посв. 95-летию со дня рождения акад. Г. И. Марчука Новосибирск, 19-23 октября 2020 г. Ин-т вычислит. математики и матем. геофизики СО РАН, Новосибирск: ИПЦ НГУ, 2020, с. 167, doi: 10.24411/9999-017A-2020-10295 .</mixed-citation>
     <mixed-citation xml:lang="en">Pringlaeva A.M., Dergilev A.I., Panova A.D., Orlov Y.L. The complexity of the text and the structure of genome repeats on the example of coronavirus. Marchuk Scientific Readings 2020: Abstracts of the Intern. conf., dedicated 95th anniversary of the birth of Acad. G. I. Marchuk Novosibirsk, October 19-23, 2020. Inst. Comput. mathematics and math. geophysics SB RAS, Novosibirsk: CPI NSU, 2020, p. 167 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B32">
    <label>32.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Галиева А.Г., Лузин А.Н., Орлова Н.Г., Куликова Д.К., Дергилев А.И., Орлов Ю.Л. Биоинформационные подходы для анализа точек мутации генома коронавируса. В сборнике: Молекулярная диагностика и биобезопасность-2021. COVID-19: эпидемиология, диагностика, профилактика: сборник тезисов Онлайн-конгресса с международным участием (28-29 апреля 2021 г., Москва). М.: ФБУН ЦНИИ Эпидемиологии Роспотребнадзора, 2021, 144 с.</mixed-citation>
     <mixed-citation xml:lang="en">Galieva A.G., Luzin A.N., Orlova N.G., Kulikova D.K., Dergilev A.I., Orlov Y.L. Bioinformatics approaches to analyze the mutation points of the coronavirus genome. In the collection: Molecular Diagnostics and Biosafety-2021. COVID-19: epidemiology, diagnosis, prevention: collection of abstracts of the Online Congress with international participation (April 28-29, 2021, Moscow). M.: Central Research Institute of Epidemiology of Rospotrebnadzor, 2021, 144 p. (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B33">
    <label>33.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Antao R., Mota A., Machado J.A.T. Kolmogorov complexity as a data similarity metric: application in mitochondrial DNA. Nonlinear Dyn., 2018, vol. 93, no. 3, pp. 1059-1071.</mixed-citation>
     <mixed-citation xml:lang="en">Antao R., Mota A., Machado J.A.T. Kolmogorov complexity as a data similarity metric: application in mitochondrial DNA. Nonlinear Dyn., 2018, vol. 93, no. 3, pp. 1059-1071.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B34">
    <label>34.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Dheemanth H.N. LZW Data Compression. American Journal of Engineering Research (AJER), 2014, vol. 3, no. 2, pp. 22-26.</mixed-citation>
     <mixed-citation xml:lang="en">Dheemanth H.N. LZW Data Compression. American Journal of Engineering Research (AJER), 2014, vol. 3, no. 2, pp. 22-26.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B35">
    <label>35.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Putta P., Orlov Y.L., Podkolodnyy N.L., Mitra C.K. Relatively conserved common short sequences in transcription factor binding sites and miRNA. Вавиловский журнал генетики и селекции, 2011, т. 15, № 4, с. 750-756.</mixed-citation>
     <mixed-citation xml:lang="en">Putta P., Orlov Y.L., Podkolodnyy N.L., Mitra C.K. Relatively conserved common short sequences in transcription factor binding sites and miRNA. Vavilov Journal of Genetics and Breeding, 2011, vol. 15, no. 4, pp. 750-756 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B36">
    <label>36.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Orlov Y.L., te Boekhorst R., Abnizova I.I. Statistical measures of the structure of genomic sequences: entropy, complexity, and position information. J Bioinform Comput Biol., 2006, vol. 4, pp. 523-536.</mixed-citation>
     <mixed-citation xml:lang="en">Orlov Y.L., te Boekhorst R., Abnizova I.I. Statistical measures of the structure of genomic sequences: entropy, complexity, and position information. J Bioinform Comput Biol., 2006, vol. 4, pp. 523-536.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B37">
    <label>37.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Popov O., Segal D.M., Trifonov E.N. Linguistic complexity of protein sequences as compared to texts of human languages. Biosystems, 1996, vol. 38, no. 1, pp. 65-74, doi: 10.1016/0303-2647(95)01568-x.</mixed-citation>
     <mixed-citation xml:lang="en">Popov O., Segal D.M., Trifonov E.N. Linguistic complexity of protein sequences as compared to texts of human languages. Biosystems, 1996, vol. 38, no. 1, pp. 65-74, doi: 10.1016/0303-2647(95)01568-x.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B38">
    <label>38.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Troyanskaya O.G., Arbell O., Koren Y., Landau G.M., Bolshoy A. Sequence complexity profiles of prokaryotic genomic sequences: a fast algorithm for calculating linguistic complexity. Bioinformatics, 2002, vol. 18, no. 5, pp. 679-688.</mixed-citation>
     <mixed-citation xml:lang="en">Troyanskaya O.G., Arbell O., Koren Y., Landau G.M., Bolshoy A. Sequence complexity profiles of prokaryotic genomic sequences: a fast algorithm for calculating linguistic complexity. Bioinformatics, 2002, vol. 18, no. 5, pp. 679-688.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B39">
    <label>39.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Lu R., Zhao X., Li J. et al. Genomic characterisation and epidemiology of 2019 novel coronavirus: implications for virus origins and receptor binding. Lancet, 2020, vol. 395, no. 10224, pp. 565-574, doi: 10.1016/S0140-6736(20)30251-8.</mixed-citation>
     <mixed-citation xml:lang="en">Lu R., Zhao X., Li J. et al. Genomic characterisation and epidemiology of 2019 novel coronavirus: implications for virus origins and receptor binding. Lancet, 2020, vol. 395, no. 10224, pp. 565-574, doi: 10.1016/S0140-6736(20)30251-8.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B40">
    <label>40.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Hu B., Guo H., Zhou P. et al. Characteristics of SARS-CoV-2 and COVID-19. Nat Rev Microbiol., 2021, vol. 19, pp. 141-154, doi: 10.1038/s41579-020-00459-7.</mixed-citation>
     <mixed-citation xml:lang="en">Hu B., Guo H., Zhou P. et al. Characteristics of SARS-CoV-2 and COVID-19. Nat Rev Microbiol., 2021, vol. 19, pp. 141-154, doi: 10.1038/s41579-020-00459-7.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B41">
    <label>41.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Рубальская Т.С., Ерохов Д.В., Жердева П.Е., Милихина А.В., Гаджиева А.А., Тихонова Н.Т. Генотипирование вируса эпидемического паротита (Paramyxoviridae: Orthorubulavirus: Mumps Orthorubulavirus) как элемент лабораторного подтверждения инфекции. Вопросы вирусологии, 2023, т. 68, № 1, с. 59-65.</mixed-citation>
     <mixed-citation xml:lang="en">Rubalskaya T.S., Erokhov D.V., Zherdeva P.E., Milikhina A.V., Gadzhieva A.A., Tikhonova N.T. Genotyping of mumps virus (Paramyxoviridae: Orthorubulavirus: Mumps Orthorubulavirus) as element of laboratory confirmation of infection. Questions of virology, 2023, vol. 68, no. 1, pp. 59-65 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B42">
    <label>42.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Su S.B., Chang H.L., Chen A.K. Current Status of Mumps Virus Infection: Epidemiology, Pathogenesis, and Vaccine. Int J Environ Res Public Health, 2020, vol. 17, no. 5, p. 1686, doi: 10.3390/ijerph17051686.</mixed-citation>
     <mixed-citation xml:lang="en">Su S.B., Chang H.L., Chen A.K. Current Status of Mumps Virus Infection: Epidemiology, Pathogenesis, and Vaccine. Int J Environ Res Public Health, 2020, vol. 17, no. 5, p. 1686, doi: 10.3390/ijerph17051686.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B43">
    <label>43.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Юминова Н.В., Контарова Е.О., Балаев Н.В., Артюшенко С.В., Контаров Н.А., Россошанская Н.В., Сидоренко Е.С., Гафаров Р.Р., Зверев В.В. Вакцинопрофилактика кори, эпидемического паротита и краснухи: задачи, проблемы и реалии. Эпидемиология и Вакцинопрофилактика, 2011, т. 4, № 59, с. 40-44.</mixed-citation>
     <mixed-citation xml:lang="en">Yuminova N.V., Kontarova E.O., Balaev N.V., Artyushenko S.V., Kontarov N.A., Rossoshanskaya N.V., Sidorenko E.S., Gafarov R.R., Zverev V.V. Measles, mumps and rubella vaccination: tasks, problems and realities. Epidemiology and Vaccinal Prevention, 2011, vol. 4, no. 59, pp. 40-44 (In Russ.).</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B44">
    <label>44.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Chao H., Zhang S., Hu Y., Ni Q., Xin S., Zhao L., Ivanisenko V.A., Orlov Y.L., Chen M. Integrating omics databases for enhanced crop breeding. J Integr Bioinform., 2023, doi: 10.1515/jib-2023-0012.</mixed-citation>
     <mixed-citation xml:lang="en">Chao H., Zhang S., Hu Y., Ni Q., Xin S., Zhao L., Ivanisenko V.A., Orlov Y.L., Chen M. Integrating omics databases for enhanced crop breeding. J Integr Bioinform., 2023, doi: 10.1515/jib-2023-0012.</mixed-citation>
    </citation-alternatives>
   </ref>
   <ref id="B45">
    <label>45.</label>
    <citation-alternatives>
     <mixed-citation xml:lang="ru">Orlov Y.L., Bragin A.O., Babenko R.O., Dresvyannikova A.E., Kovalev S.S., Shaderkin I.A., Orlova N.G., Naumenko F.M. Integrated Computer Analysis of Genomic Sequencing Data Based on ICGenomics Tool. In: Advances in Intelligent Systems, Computer Science and Digital Economics. CSDEIS 2019, AISC 1127, International Journal of Intelligent Systems and Applications (IJISA), 2020, pp. 154-164, doi: 10.1007/978-3-030-39216-1_15.</mixed-citation>
     <mixed-citation xml:lang="en">Orlov Y.L., Bragin A.O., Babenko R.O., Dresvyannikova A.E., Kovalev S.S., Shaderkin I.A., Orlova N.G., Naumenko F.M. Integrated Computer Analysis of Genomic Sequencing Data Based on ICGenomics Tool. In: Advances in Intelligent Systems, Computer Science and Digital Economics. CSDEIS 2019, AISC 1127, International Journal of Intelligent Systems and Applications (IJISA), 2020, pp. 154-164, doi: 10.1007/978-3-030-39216-1_15.</mixed-citation>
    </citation-alternatives>
   </ref>
  </ref-list>
 </back>
</article>
