Have you looked at response content
Here a example found online you can try.
print(response)
to see if it's really is a binary format you get back?Here a example found online you can try.
import requests import json # Replace with your Text to Speech subscription key subscription_key = "YOUR_SUBSCRIPTION_KEY" # Replace with the voice you want to use voice = "en-US-Jessa24kRUS" # Replace with the text you want to convert to speech text = "Hello, this is an example of the Text to Speech API." # Specify the language and format of the audio file headers = { "Ocp-Apim-Subscription-Key": subscription_key, "Content-Type": "application/ssml+xml", "X-Microsoft-OutputFormat": "riff-24khz-16bit-mono-pcm", "User-Agent": "YOUR_RESOURCE_NAME" } # Create the SSML request body = "<speak version='1.0' xml:lang='en-us'><voice xml:lang='en-us' xml:gender='Female' name='" + \ voice + "'>" + text + "</voice></speak>" # Make the request to the Text to Speech API response = requests.post("https://YOUR_REGION.tts.speech.microsoft.com/cognitiveservices/v1", headers=headers, data=body) # Save the generated audio file with open("output.wav", "wb") as audio_file: audio_file.write(response.content) print("The file has been saved to output.wav.")