이제 pod 추가부터 해보도록 하겠습니다.
https://onedaycodeing.tistory.com/88
코코아가 처음이신분은 해당 게시글을 참고해주시고 와주세요.
Podfile에
pod ‘googleapis’, :path => ‘.’
한줄 넣고 저장해주세요.
아직 pod install 하시면 안됩니다..
해당 압축 안에있는 파일들을 프로젝트 폴더 안에 바로 넣어주세요
그리고 pod install 해주시면 됩니다..!!
이제 코드로 넘어가겠습니다.
SpeechRecognitionService 클래스를 하나 만들어서 아래와 같이 기술해주세요.
import Foundation
import googleapis
typealias SpeechRecognitionCompletionHandler = (StreamingRecognizeResponse?, NSError?) -> (Void)
class SpeechRecognitionService {
var sampleRate: Int = 16000
private var streaming = false
private var client : Speech!
private var writer : GRXBufferedPipe!
private var call : GRPCProtoCall!
static let sharedInstance = SpeechRecognitionService()
func streamAudioData(_ audioData: NSData, completion: @escaping SpeechRecognitionCompletionHandler) {
if (!streaming) {
// if we aren't already streaming, set up a gRPC connection
client = Speech(host: "speech.googleapis.com")
print("client: \(String(describing: client))")
writer = GRXBufferedPipe()
call = client.rpcToStreamingRecognize(withRequestsWriter: writer,
eventHandler:
{ (done, response, error) in
completion(response, error as? NSError)
})
// API키
call.requestHeaders.setObject(NSString(string:"본인의 발급받은 API KEY"),
forKey:NSString(string:"X-Goog-Api-Key"))
call.requestHeaders.setObject(NSString(string:Bundle.main.bundleIdentifier!),
forKey:NSString(string:"X-Ios-Bundle-Identifier"))
print("HEADERS:\(call.requestHeaders)")
call.start()
streaming = true
// send an initial request message to configure the service
let recognitionConfig = RecognitionConfig()
recognitionConfig.encoding = .linear16
recognitionConfig.sampleRateHertz = Int32(sampleRate)
recognitionConfig.languageCode = "ko-KR"//"en-US"
recognitionConfig.maxAlternatives = 30
recognitionConfig.enableWordTimeOffsets = true
let streamingRecognitionConfig = StreamingRecognitionConfig()
streamingRecognitionConfig.config = recognitionConfig
streamingRecognitionConfig.singleUtterance = false
streamingRecognitionConfig.interimResults = true
let streamingRecognizeRequest = StreamingRecognizeRequest()
streamingRecognizeRequest.streamingConfig = streamingRecognitionConfig
writer.writeValue(streamingRecognizeRequest)
}
let streamingRecognizeRequest = StreamingRecognizeRequest()
streamingRecognizeRequest.audioContent = audioData as Data
writer.writeValue(streamingRecognizeRequest)
}
func stopStreaming() {
if (!streaming) {
return
}
writer.finishWithError(nil)
streaming = false
}
func isStreaming() -> Bool {
return streaming
}
}
그다음에
AudioController 클래스를 만들어주세요.
import Foundation
import AVFoundation
protocol AudioControllerDelegate {
func processSampleData(_ data:Data) -> Void
}
class AudioController {
var remoteIOUnit: AudioComponentInstance? // optional to allow it to be an inout argument
var delegate : AudioControllerDelegate!
static var sharedInstance = AudioController()
deinit {
AudioComponentInstanceDispose(remoteIOUnit!);
}
func prepare(specifiedSampleRate: Int) -> OSStatus {
var status = noErr
let session = AVAudioSession.sharedInstance()
do {
try session.setCategory(AVAudioSession.Category.record)
try session.setPreferredIOBufferDuration(10)
} catch {
return -1
}
var sampleRate = session.sampleRate
print("hardware sample rate = \(sampleRate), using specified rate = \(specifiedSampleRate)")
sampleRate = Double(specifiedSampleRate)
// Describe the RemoteIO unit
var audioComponentDescription = AudioComponentDescription()
audioComponentDescription.componentType = kAudioUnitType_Output;
audioComponentDescription.componentSubType = kAudioUnitSubType_RemoteIO;
audioComponentDescription.componentManufacturer = kAudioUnitManufacturer_Apple;
audioComponentDescription.componentFlags = 0;
audioComponentDescription.componentFlagsMask = 0;
// Get the RemoteIO unit
let remoteIOComponent = AudioComponentFindNext(nil, &audioComponentDescription)
status = AudioComponentInstanceNew(remoteIOComponent!, &remoteIOUnit)
if (status != noErr) {
return status
}
let bus1 : AudioUnitElement = 1
var oneFlag : UInt32 = 1
// Configure the RemoteIO unit for input
status = AudioUnitSetProperty(remoteIOUnit!,
kAudioOutputUnitProperty_EnableIO,
kAudioUnitScope_Input,
bus1,
&oneFlag,
UInt32(MemoryLayout<UInt32>.size));
if (status != noErr) {
return status
}
// Set format for mic input (bus 1) on RemoteIO's output scope
var asbd = AudioStreamBasicDescription()
asbd.mSampleRate = sampleRate
asbd.mFormatID = kAudioFormatLinearPCM
asbd.mFormatFlags = kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked
asbd.mBytesPerPacket = 2
asbd.mFramesPerPacket = 1
asbd.mBytesPerFrame = 2
asbd.mChannelsPerFrame = 1
asbd.mBitsPerChannel = 16
status = AudioUnitSetProperty(remoteIOUnit!,
kAudioUnitProperty_StreamFormat,
kAudioUnitScope_Output,
bus1,
&asbd,
UInt32(MemoryLayout<AudioStreamBasicDescription>.size))
if (status != noErr) {
return status
}
// Set the recording callback
var callbackStruct = AURenderCallbackStruct()
callbackStruct.inputProc = recordingCallback
callbackStruct.inputProcRefCon = nil
status = AudioUnitSetProperty(remoteIOUnit!,
kAudioOutputUnitProperty_SetInputCallback,
kAudioUnitScope_Global,
bus1,
&callbackStruct,
UInt32(MemoryLayout<AURenderCallbackStruct>.size));
if (status != noErr) {
return status
}
// Initialize the RemoteIO unit
return AudioUnitInitialize(remoteIOUnit!)
}
func start() -> OSStatus {
return AudioOutputUnitStart(remoteIOUnit!)
}
func stop() -> OSStatus {
return AudioOutputUnitStop(remoteIOUnit!)
}
}
func recordingCallback(
inRefCon:UnsafeMutableRawPointer,
ioActionFlags:UnsafeMutablePointer<AudioUnitRenderActionFlags>,
inTimeStamp:UnsafePointer<AudioTimeStamp>,
inBusNumber:UInt32,
inNumberFrames:UInt32,
ioData:UnsafeMutablePointer<AudioBufferList>?) -> OSStatus {
var status = noErr
let channelCount : UInt32 = 1
var bufferList = AudioBufferList()
bufferList.mNumberBuffers = channelCount
let buffers = UnsafeMutableBufferPointer<AudioBuffer>(start: &bufferList.mBuffers,
count: Int(bufferList.mNumberBuffers))
buffers[0].mNumberChannels = 1
buffers[0].mDataByteSize = inNumberFrames * 2
buffers[0].mData = nil
// get the recorded samples
status = AudioUnitRender(AudioController.sharedInstance.remoteIOUnit!,
ioActionFlags,
inTimeStamp,
inBusNumber,
inNumberFrames,
UnsafeMutablePointer<AudioBufferList>(&bufferList))
if (status != noErr) {
return status;
}
let data = Data(bytes: buffers[0].mData!, count: Int(buffers[0].mDataByteSize))
DispatchQueue.main.async {
AudioController.sharedInstance.delegate.processSampleData(data)
}
return noErr
}
이제 메인 뷰컨으로 돌아오시면 됩니다.
변환된 텍스트가 보일 라벨과 시작 정지를 누를 버튼을 하나 만들어주세요.
@IBOutlet weak var resultText: UILabel!
@IBOutlet weak var startBtn: UIButton!
var check = false
var audioData: NSMutableData!
저는 이렇게 만들어줬습니다.
Ui 배치는 여러분이 편하신대로 배치해주세요 테스트용 이니까요
override func viewDidLoad() {
super.viewDidLoad()
AudioController.sharedInstance.delegate = self
}
viewDidLoad에서 이렇게 대리자 설정을 해주시고
저는 extension으로 하나 분리했습니다.
extension ViewController : AudioControllerDelegate {
}
그럼 아마 에러로
processSampleData < --- 이거 선언하라고 뜰거에요
선언해주시면
func processSampleData(_ data: Data) -> Void {
code
}
여기에 이제부터 코드를 작성해볼게요.
func processSampleData(_ data: Data) -> Void {
audioData.append(data)
// We recommend sending samples in 100ms chunks
let chunkSize : Int /* bytes/chunk */ = Int(0.1 /* seconds/chunk */
* Double(16000) /* samples/second */
* 2 /* bytes/sample */);
if (audioData.length > chunkSize) {
SpeechRecognitionService.sharedInstance.streamAudioData(audioData,
completion:
{ [weak self] (response, error) in
guard let strongSelf = self else {
return
}
if let error = error {
print("error")
self?.resultText.text = error.localizedDescription
} else if let response = response {
var finished = false
print("response: \(response)\n, description: \(response.description)")
for result in response.resultsArray! {
if let result = result as? StreamingRecognitionResult {
if result.isFinal {
print("result: \(result.alternativesArray[0])")
let trans = result.alternativesArray[0] as? SpeechRecognitionAlternative
print("trans: \(trans?.transcript)")
finished = true
self?.resultText.text = trans?.transcript
}
}
}
if finished {
print("말하기종료")
self?.check = false
_ = AudioController.sharedInstance.stop()
SpeechRecognitionService.sharedInstance.stopStreaming()
self?.startBtn.setTitle("말하기 시작", for: .normal)
}
}
})
self.audioData = NSMutableData()
}
}
예제코드를 참고하여 만들었습니다.
에러가 나는 부분은 선언하신 변수명같은게 틀릴테니 그부분만 수정해주세요.
말하기가 종료되면 자동으로 변환된 텍스트를 반환하고 다시 시작할수 있게했어요.
이제 시작 버튼 리스너에 코드를 작성하겠습니다.
@objc func startBtnClick(sender: UITapGestureRecognizer) {
if(check){
//스탑
self.check = false
_ = AudioController.sharedInstance.stop()
SpeechRecognitionService.sharedInstance.stopStreaming()
startBtn.setTitle("말하기 시작", for: .normal)
}
else{
//시작
startBtn.setTitle("말하기 멈추기", for: .normal)
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSession.Category.record)
} catch {
}
audioData = NSMutableData()
_ = AudioController.sharedInstance.prepare(specifiedSampleRate: 16000)
SpeechRecognitionService.sharedInstance.sampleRate = 16000
_ = AudioController.sharedInstance.start()
self.check = true
}
}
이렇게 해주시면 모든게 끝납니다.
안드로이드로 할때는 내장된 SDK가 있어서 편했는데
아이폰으로 할때는 인증정보부터 키까지 정말 할게많네요!!
무료도 아니고 유료구요 ㅠㅠ
ㄱ ㅏ ㄱ 푸시
ㅗ o ㅗ 푸시 :D
'IOS' 카테고리의 다른 글
iOS StoryBoard(스토리보드)에서 스크롤뷰 사용하기(ScrollView) (0) | 2023.04.19 |
---|---|
Swift 애플 스피치 이용하여 STT 만들어보기. (0) | 2023.03.29 |
Swift에서 구글 STT 이용해보기 Part - 3 (0) | 2023.03.28 |
Swift에서 구글 STT 이용해보기 Part - 2 (0) | 2023.03.28 |
Swift에서 구글 STT 이용해보기 Part - 1 (0) | 2023.03.28 |
댓글