File size: 4,946 Bytes
dca3e52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86bfa3b
dca3e52
 
 
 
 
 
 
 
47351da
dca3e52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86bfa3b
 
 
 
dca3e52
 
 
 
86bfa3b
dca3e52
86bfa3b
 
dca3e52
86bfa3b
dca3e52
 
 
 
e923e2b
dca3e52
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pyktok as pyk
pyk.specify_browser("chrome")  # Specify the browser to use for accessing TikTok
import os
import moviepy.editor as mp
import requests
import time
from pathlib import Path
from openai import OpenAI

client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

def download_video(url, video_id, outpath=Path('.')):
    video_path = outpath / 'tiktok_video.mp4'
    if not video_path.exists():
        pyk.save_tiktok(url, True, str(video_path))
        new_vid = [p for p in outpath.parent.glob('*.mp4') if video_id in p.name][0]
        video_path = outpath / new_vid.name
        new_vid.rename(video_path)
        print('Downloaded video wait...', end='')
        time.sleep(2)
        print('Done')
    else:
        print('Video already exists. Skipping download.')
    return video_path

def extract_audio(path):
    audio_path = Path(path).parent / "tiktok_audio.mp3"
    if not audio_path.exists():
        print('Extracting audio...', flush=True)
        video = mp.VideoFileClip(str(path))
        video.audio.write_audiofile(str(audio_path))
        print('Audio extracted', flush=True)
    else:
        print('Audio already exists. Skipping extraction.')
    return audio_path

def transcribe_audio(audio_file_path):
    transcript_path = Path(audio_file_path).parent / 'transcript.txt'
    if not transcript_path.exists():
        print('Sending for transcription...', flush=True)
        with open(audio_file_path, 'rb') as audio_file:
            transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_file, 
                                                               response_format="text")
        with transcript_path.open('w') as f:
            f.write(transcription)
        print('Transcription done', flush=True)
    else:
        print('Transcript already exists. Skipping transcription.')
        with transcript_path.open() as f:
            transcription = f.read()
    return transcription, transcript_path

def format_recipe(transcript, output_path, tiktok_url):
    formatted_recipe_path = output_path / 'formatted_recipe.txt'
    if not formatted_recipe_path.exists():
        prompt = f"Please format this recipe transcript into a nicely formatted recipe:\n\n{transcript}"
        
        response = client.chat.completions.create(model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that turns transcripts of TikTok recipe videos into nicely formatted recipes. Please output the recipe only and no additional text or comentary. Each recipe should have exactly three sections: Title, Ingredients, and Instructions. Make sure to write every step and ingredient and if you're not sure about something make sure to write a note in parentheses explaining why you are unsure and how you guessed, prepend '≈' to the amount, and make the best estimation you can given the context. You may also optionally add a note at the end of the recipe."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=500,
        n=1,
        stop=None,
        temperature=0.7)
        
        formatted_recipe = response.choices[0].message.content.strip().lstrip('Title:').strip()
        
        formatted_recipe = f'{formatted_recipe}\n\n\n{tiktok_url}'
        
        with formatted_recipe_path.open('w') as f:
            f.write(formatted_recipe)
    else:
        print('Formatted recipe already exists. Skipping formatting.')
        with formatted_recipe_path.open() as f:
            formatted_recipe = f.read()
    return formatted_recipe, formatted_recipe_path





def expand_url(short_url):
    response = requests.get(short_url, allow_redirects=False)
    if response.status_code == 301 or response.status_code == 302:
        return response.headers['Location']
    else:
        return short_url

def extract_recipe(tiktok_url):
    tiktok_url = expand_url(tiktok_url)
    tiktok_url_full = tiktok_url.strip().split("?")[0].strip()
    tiktok_url_clean = tiktok_url_full.lstrip('https://').lstrip('http://').lstrip('www.')
    
    _, user, _, video_id = tiktok_url_clean.split("/")
    
    output_path = Path(video_id)
    output_path.mkdir(parents=True, exist_ok=True)
    
    video_path = download_video(tiktok_url_full, video_id, output_path)
    audio_path = extract_audio(video_path)
    transcript, transcript_path = transcribe_audio(audio_path)
    formatted_recipe, formatted_recipe_path = format_recipe(transcript, output_path, tiktok_url_clean)
    
    return formatted_recipe, formatted_recipe_path, transcript_path

def main():
    # tiktok_url = "https://www.tiktok.com/@emmaaaaaaam_/video/7348493781961886981"
    tiktok_url = "https://www.tiktok.com/t/ZTLjYBSpt/"
    formatted_recipe, formatted_recipe_path, transcript_path = extract_recipe(tiktok_url)
    
    print("Formatted Recipe:")
    print(formatted_recipe)

if __name__ == "__main__":
    main()