MJobe commited on
Commit
58b3b85
1 Parent(s): 1f23076

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +9 -5
main.py CHANGED
@@ -168,7 +168,11 @@ async def transcribe_and_match(
168
  contents = await file.read()
169
  audio = AudioSegment.from_file(BytesIO(contents))
170
 
171
- # Step 2: Export to WAV format and load with torchaudio
 
 
 
 
172
  wav_buffer = BytesIO()
173
  audio.export(wav_buffer, format="wav")
174
  wav_buffer.seek(0)
@@ -179,14 +183,14 @@ async def transcribe_and_match(
179
  # Convert waveform to float32
180
  samples = waveform.numpy().astype(np.float32)
181
 
182
- # Step 3: Use the speech-to-text model
183
  transcription_result = nlp_speech_to_text(samples)
184
  transcription_text = transcription_result['text']
185
 
186
- # Step 4: Parse the field_data (which contains field names/IDs)
187
  fields = json.loads(field_data)
188
 
189
- # Step 5: Find the matching field for the transcription
190
  field_matches = {}
191
  for field in fields:
192
  field_label = field.get("field_label", "").lower()
@@ -196,7 +200,7 @@ async def transcribe_and_match(
196
  if field_label in transcription_text.lower():
197
  field_matches[field_id] = transcription_text
198
 
199
- # Step 6: Return transcription + matched fields
200
  return {
201
  "transcription": transcription_text,
202
  "matched_fields": field_matches
 
168
  contents = await file.read()
169
  audio = AudioSegment.from_file(BytesIO(contents))
170
 
171
+ # Step 2: Ensure audio is mono
172
+ if audio.channels > 1:
173
+ audio = audio.set_channels(1) # Convert to mono
174
+
175
+ # Step 3: Export to WAV format and load with torchaudio
176
  wav_buffer = BytesIO()
177
  audio.export(wav_buffer, format="wav")
178
  wav_buffer.seek(0)
 
183
  # Convert waveform to float32
184
  samples = waveform.numpy().astype(np.float32)
185
 
186
+ # Step 4: Use the speech-to-text model
187
  transcription_result = nlp_speech_to_text(samples)
188
  transcription_text = transcription_result['text']
189
 
190
+ # Step 5: Parse the field_data (which contains field names/IDs)
191
  fields = json.loads(field_data)
192
 
193
+ # Step 6: Find the matching field for the transcription
194
  field_matches = {}
195
  for field in fields:
196
  field_label = field.get("field_label", "").lower()
 
200
  if field_label in transcription_text.lower():
201
  field_matches[field_id] = transcription_text
202
 
203
+ # Step 7: Return transcription + matched fields
204
  return {
205
  "transcription": transcription_text,
206
  "matched_fields": field_matches