<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">bsuir</journal-id><journal-title-group><journal-title xml:lang="ru">Доклады БГУИР</journal-title><trans-title-group xml:lang="en"><trans-title>Doklady BGUIR</trans-title></trans-title-group></journal-title-group><issn pub-type="ppub">1729-7648</issn><issn pub-type="epub">2708-0382</issn><publisher><publisher-name>БГУИР</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.35596/1729-7648-2023-21-2-114-120</article-id><article-id custom-type="elpub" pub-id-type="custom">bsuir-3607</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>ЭЛЕКТРОНИКА, РАДИОФИЗИКА, РАДИОТЕХНИКА, ИНФОРМАТИКА</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>ELECTRONICS, RADIOPHYSICS, RADIOENGINEERING, INFORMATICS</subject></subj-group></article-categories><title-group><article-title>Распознавание голоса с использованием свёрточной нейронной сети</article-title><trans-title-group xml:lang="en"><trans-title>Voice Detection Using Convolutional Neural Network</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Вишняков</surname><given-names>В. А.</given-names></name><name name-style="western" xml:lang="en"><surname>Vishniakou</surname><given-names>U. A.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Минск</p></bio><bio xml:lang="en"><p>Vishniakou Uladzimir Anatolievich, Dr. of Sci. (Eng.), Professor at the Department of Infocommunication Technologies</p><p>220013, Minsk, P. Brovki St., 6</p><p>Tel.: +375 44 486-71-82</p></bio><email xlink:type="simple">vish@bsuir.by</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Шайя</surname><given-names>Б. Х.</given-names></name><name name-style="western" xml:lang="en"><surname>Shaya</surname><given-names>B. H.</given-names></name></name-alternatives><bio xml:lang="ru"><p>Минск</p></bio><bio xml:lang="en"><p>Shaya Bahaa H., Postgraduate at the Department of Infocommunication Technologies</p><p>Minsk</p></bio><xref ref-type="aff" rid="aff-1"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru"><institution>Белорусский государственный университет информатики и радиоэлектроники</institution></aff><aff xml:lang="en"><institution>Belarusian State University of Informatics and Radioelectronics</institution></aff></aff-alternatives><pub-date pub-type="collection"><year>2023</year></pub-date><pub-date pub-type="epub"><day>24</day><month>04</month><year>2023</year></pub-date><volume>21</volume><issue>2</issue><fpage>114</fpage><lpage>120</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Вишняков В.А., Шайя Б.Х., 2023</copyright-statement><copyright-year>2023</copyright-year><copyright-holder xml:lang="ru">Вишняков В.А., Шайя Б.Х.</copyright-holder><copyright-holder xml:lang="en">Vishniakou U.A., Shaya B.H.</copyright-holder><license xml:lang="ru" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>Данная работа распространяется под лицензией Creative Commons Attribution 4.0.</license-p></license><license xml:lang="en" license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://doklady.bsuir.by/jour/article/view/3607">https://doklady.bsuir.by/jour/article/view/3607</self-uri><abstract><p>Представлены подход, методология, программная система, основанные на свёрточной нейронной сети, для распознавания голоса (кашля) в условиях зашумленности с использованием технологий машинного обучения. Разработана и оценена система распознавания кашля на основе машинного обучения, использования свёрточной нейронной сети и библиотек языка Python. Свёрточная нейронная сеть протестирована с помощью различных наборов данных и библиотек. В отличие от существующих современных работ в этой области предложенная система оценивалась с применением реального набора звуковых данных окружающей среды, а не только отфильтрованных или разделенных звуковых параметров голоса. Окончательная скомпилированная модель показала относительно высокую среднюю точность – 85,37 %. Предлагаемая система способна распознавать звук голоса в многолюдном общественном месте, и нет необходимости в фазе разделения звука для предварительной обработки, как в других системах. Несколько добровольцев записали звуки своего голоса с помощью смартфонов. Затем они протестировали свои голоса в общественных местах на предмет шума в дополнение к некоторым аудиофайлам, которые были загружены онлайн. Результаты показали среднюю точность распознавания – 85,37 %, минимальную – 78,8 % и рекордную – 91,9 %. </p></abstract><trans-abstract xml:lang="en"><p>The article presents an approach, methodology, the software system based on a machine learning technologies for convolutional neural network and its use for voice (cough) recognition. Tasks of article are receiving evaluating a voice detection system with deep learning, the use of a convolutional neural network and Python language for patients with cough. The convolutional neural network has been developed, trained and tested using various datasets and Python libraries. Unlike the existing modern works related to this area, proposed system was evaluated using a real set of environmental sound data, and not only on filtered or separated voice audio tracks. The final compiled model showed a relatively high average accuracy of 85.37 %. Thus, the system is able to detect the sound of a voice in a crowded public place, and there is no need for a sound separation phase for pre-processing, as other modern systems require. Several volunteers recorded their voice sounds using microphones of their smartphones, and it was guaranteed that they would test their voices in public places to make noise, in addition to some audio files that were uploaded online. The results showed an average recognition accuracy – of 85.37 %, a minimum accuracy – of 78.8 % and a record – of 91.9 %. </p></trans-abstract><kwd-group xml:lang="ru"><kwd>распознавание голоса</kwd><kwd>свёрточная нейронная сеть</kwd><kwd>набор данных на основе машинного обучения</kwd><kwd>аудиофайлы</kwd></kwd-group><kwd-group xml:lang="en"><kwd>voice detection</kwd><kwd>convolution neural network</kwd><kwd>machine learning-based dataset</kwd><kwd>audio files</kwd></kwd-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Shakel N. V., Ablameyko M. S. (2020) Medical Worker and Patient: Interaction in the Context of E-Health. Minsk, Eco-Perspective Publ. (in Russian).</mixed-citation><mixed-citation xml:lang="en">Shakel N. V., Ablameyko M. S. (2020) Medical Worker and Patient: Interaction in the Context of E-Health. Minsk, Eco-Perspective Publ. (in Russian).</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Alqudaihi K. S., Aslam N., Khan I. U. [et al.] (2021) Cough Sound Detection and Diagnosis Using Artificial Intelligence Techniques: Challenges and Opportunities. IEEE Public Health Emergency Collection. 9, 102327–102344.</mixed-citation><mixed-citation xml:lang="en">Alqudaihi K. S., Aslam N., Khan I. U. [et al.] (2021) Cough Sound Detection and Diagnosis Using Artificial Intelligence Techniques: Challenges and Opportunities. IEEE Public Health Emergency Collection. 9, 102327–102344.</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Amoh J., Odame K. (2016) Deep Neural Networks for Identifying Cough Sounds. IEEE Transactions on Biomedical Circuits and Systems. 10 (5), 1003–1011.</mixed-citation><mixed-citation xml:lang="en">Amoh J., Odame K. (2016) Deep Neural Networks for Identifying Cough Sounds. IEEE Transactions on Biomedical Circuits and Systems. 10 (5), 1003–1011.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Gong Y., Lai C.-I. J., Chung Y.-A., Glass J. (2021) SSAST: Self-Supervised Audio Spectrogram Transformer. Applied Science. 570–575.</mixed-citation><mixed-citation xml:lang="en">Gong Y., Lai C.-I. J., Chung Y.-A., Glass J. (2021) SSAST: Self-Supervised Audio Spectrogram Transformer. Applied Science. 570–575.</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Nanni L., Maguolo G., Brahnam S., Paci M. (2021) An Ensemble of Convolutional Neural Networks for Audio Classification. Applied Science. 57–76.</mixed-citation><mixed-citation xml:lang="en">Nanni L., Maguolo G., Brahnam S., Paci M. (2021) An Ensemble of Convolutional Neural Networks forAudio Classification. Applied Science. 57–76.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Chowdhury A., Ross A. (2019) Fusing MFCC and LPC Features using 1D Triplet CNN for Speaker Recognition in Severely Degraded Audio Signals. IEEE Transactions on Information Forensics and Security. 15, 1616–1629.</mixed-citation><mixed-citation xml:lang="en">Chowdhury A., Ross A. (2019) Fusing MFCC and LPC Features using 1D Triplet CNN for Speaker Recognition in Severely Degraded Audio Signals. IEEE Transactions on Information Forensics and Security. 15, 1616–1629.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Visniakou U. A., Shaya B. H. (2022) Implementation of the Internet of Things Network for Monitoring Audio Information on a Microprocessor and Controller. System Analysis and Application Informatics. (1), 39–44.</mixed-citation><mixed-citation xml:lang="en">Visniakou U. A., Shaya B. H. (2022) Implementation of the Internet of Things Network for Monitoring Audio Information on a Microprocessor and Controller. System Analysis and Application Informatics. (1), 39–44.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
