Commit 26e99955 authored by Bokoch Sergey's avatar Bokoch Sergey
Browse files

Add CNN for speech recognition

parent b4d14f24
%% Cell type:markdown id: tags:
# Генерация открытого датасета для YandexAudioCaptcha
%% Cell type:code id: tags:
``` python
from gtts import gTTS
import os
from pydub import AudioSegment
DIR = 'train'
if not os.path.exists(DIR):
os.makedirs(DIR)
for i in range(111, 120):
tts = gTTS(' '.join(list(str(i))))
file_path = DIR + '/train_{:02d}.wav'.format(i)
tts.save(file_path)
```
%% Cell type:markdown id: tags:
Генерация с [сайта](translate.yandex.ru)
%% Cell type:code id: tags:
``` python
import requests
import sys
import re
import os
from os import path
import pandas as pd
```
%% Cell type:code id: tags:
``` python
DIR_PATH = 'train_yandex'
if not os.path.exists(DIR):
os.makedirs(DIR)
URL = 'https://translate.yandex.ru/?lang=ru-en'
COUNT_EXAMPLES = 100
```
%% Cell type:code id: tags:
``` python
URL_YANDEX_SPEAKER = 'https://tts.voicetech.yandex.net/tts?text={your text}&lang=en_ENG&format=wav&quality=hi&platform=web&application=translate&chunked=0&mock-ranges=7'
#https://tts.voicetech.yandex.net/tts?text={1}&lang=en_GB&format=wav&quality=hi&platform=web&application=translate&chunked=0&mock-ranges=7
file_names = []
answers = []
for i in range(COUNT_EXAMPLES):
d = i%10
myfile = requests.get(URL_YANDEX_SPEAKER.replace('{your text}', str(d)), allow_redirects=True)
dst = DIR_PATH + '/yandex_{:02d}.wav'.format(i)
open(dst, 'wb').write(myfile.content)
answers.append(d)
file_names.append(dst)
df = pd.DataFrame({'file_name':file_names, 'word':answers})
df.to_csv('yandex.csv')
```
%% Cell type:code id: tags:
``` python
```
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment