Overview
Build a model that extracts structured data from YouTube videos:- Process YouTube videos to generate synthetic training data
- Use question/answer templates to extract JSON output
- Extract stock tickers, investment tips, risks, and topics
- Create snapshots, get recommendations, and launch fine-tuning
Export your Prem API key as
API_KEY before running any script.
The example uses 5 curated financial YouTube videos. You can modify the YOUTUBE_URLS array to use your own videos.1
Set YouTube URLs
Copy
Ask AI
const API_KEY = process.env.API_KEY;
// Define the YouTube videos you want to analyze
const YOUTUBE_URLS = [
'https://www.youtube.com/watch?v=JH-k5f4Yclc',
'https://www.youtube.com/watch?v=YEWhxcpMS1c',
'https://www.youtube.com/watch?v=cb8up3HVXis',
'https://www.youtube.com/watch?v=26xatIiMv88',
'https://www.youtube.com/watch?v=-Da3gUdzCvs'
];
2
Generate dataset from YouTube
Create a project and generate synthetic Q&A pairs from YouTube videos. See Create Project and Create Synthetic Dataset for details.Copy
Ask AI
const res = await fetch('https://studio.premai.io/api/v1/public/projects/create', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ name: 'Stock Analysis Project', goal: 'Extract investment insights from financial videos' })
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const { project_id } = await res.json();
const formData = new FormData();
formData.append('project_id', project_id);
formData.append('name', 'Financial YouTube Dataset');
// Add YouTube URLs
YOUTUBE_URLS.forEach((url: string, index: number) => {
formData.append(`youtube_urls[${index}]`, url);
});
formData.append('pairs_to_generate', '50');
formData.append('pair_type', 'qa');
formData.append('temperature', '0.3');
// Add rules and constraints
formData.append('rules[]', 'stocks_mentioned: List all stock ticker symbols mentioned (e.g., AAPL, TSLA, NVDA)');
formData.append('rules[]', 'investment_tips: Extract 3-5 specific, actionable pieces of advice from the video');
formData.append('rules[]', 'risks_mentioned: List any warnings, risks, or cautionary statements discussed');
formData.append('rules[]', 'video_topic: Write a short phrase describing the main topic of the video');
formData.append('rules[]', 'Only output valid JSON with no additional text before or after');
formData.append('rules[]', 'If a field has no relevant information, use an empty array [] or empty string ""');
formData.append('rules[]', 'Use exact quotes or close paraphrases from the video content');
formData.append('rules[]', 'Do not invent or infer information not explicitly stated');
// Define question format
const questionFormat = `Extract investment information from the following video transcript:
{VIDEO_TRANSCRIPT}
Provide the output in this JSON format:
{
"stocks_mentioned": ["TICKER1", "TICKER2"],
"investment_tips": ["tip 1", "tip 2", "tip 3"],
"risks_mentioned": ["risk 1", "risk 2"],
"video_topic": "brief description of main topic"
}`;
formData.append('question_format', questionFormat);
// Define answer format
const answerFormat = `{
"stocks_mentioned": ["<TICKER_SYMBOL_1>", "<TICKER_SYMBOL_2>"],
"investment_tips": ["<specific_tip_1>", "<specific_tip_2>", "<specific_tip_3>"],
"risks_mentioned": ["<risk_or_warning_1>", "<risk_or_warning_2>"],
"video_topic": "<main_topic_of_video>"
}`;
formData.append('answer_format', answerFormat);
const res2 = await fetch('https://studio.premai.io/api/v1/public/datasets/create-synthetic', {
method: 'POST',
headers: { 'Authorization': `Bearer ${API_KEY}` },
body: formData
});
if (!res2.ok) throw new Error(`${res2.status}: ${await res2.text()}`);
const { dataset_id } = await res2.json();
3
Wait for generation
Poll the dataset status until generation completes. See Get Dataset for details.Copy
Ask AI
let dataset;
let checks = 0;
do {
await sleep(5000);
const res = await fetch(`https://studio.premai.io/api/v1/public/datasets/${dataset_id}`, {
headers: { 'Authorization': `Bearer ${API_KEY}` }
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
dataset = await res.json();
if (checks++ % 6 === 0) {
console.log(`Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
}
} while (dataset.status === 'processing');
4
Create snapshot and get recommendations
Create a snapshot and generate model recommendations. See Create Snapshot, Generate Recommendations, and Get Recommendations for details.Copy
Ask AI
const res = await fetch('https://studio.premai.io/api/v1/public/snapshots/create', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ dataset_id, split_percentage: 80 })
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const { snapshot_id } = await res.json();
const res2 = await fetch('https://studio.premai.io/api/v1/public/recommendations/generate', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ snapshot_id })
});
if (!res2.ok) throw new Error(`${res2.status}: ${await res2.text()}`);
let recs;
do {
await sleep(5000);
const res3 = await fetch(`https://studio.premai.io/api/v1/public/recommendations/${snapshot_id}`, {
headers: { 'Authorization': `Bearer ${API_KEY}` }
});
if (!res3.ok) throw new Error(`${res3.status}: ${await res3.text()}`);
recs = await res3.json();
} while (recs.status === 'processing');
5
Start fine-tuning
Launch a fine-tuning job with recommended experiments. See Create Fine-Tuning Job for details.Copy
Ask AI
const experiments = recs.recommended_experiments
.filter((e: any) => e.recommended)
.map(({ recommended, reason_for_recommendation, ...experiment }: any) => experiment);
const res = await fetch('https://studio.premai.io/api/v1/public/finetuning/create', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ snapshot_id, name: 'YouTube Model', experiments })
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const { job_id } = await res.json();
6
Monitor job
Copy
Ask AI
for (let i = 0; i < 30; i++) {
await sleep(10000);
const res = await fetch(`https://studio.premai.io/api/v1/public/finetuning/${job_id}`, {
headers: { 'Authorization': `Bearer ${API_KEY}` }
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const job = await res.json();
console.log(`Status: ${job.status}`);
job.experiments.forEach((e: any) => {
console.log(` - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
});
if (job.status !== 'processing') break;
}
Full Example
Copy
Ask AI
#!/usr/bin/env bun
/**
* Example 2: YouTube synthetic dataset workflow
* 1. Create project → 2. Generate synthetic data from YouTube → 3. Create snapshot → 4. Get recommendations → 5. Run finetuning
*/
const API_KEY = process.env.API_KEY;
const YOUTUBE_URLS = [
'https://www.youtube.com/watch?v=JH-k5f4Yclc',
'https://www.youtube.com/watch?v=YEWhxcpMS1c',
'https://www.youtube.com/watch?v=cb8up3HVXis',
'https://www.youtube.com/watch?v=26xatIiMv88',
'https://www.youtube.com/watch?v=-Da3gUdzCvs'
];
if (!API_KEY) {
console.error('Error: API_KEY environment variable is required');
console.error('Please create a .env file based on .env.example');
process.exit(1);
}
function sleep(ms: number) {
return new Promise((r) => setTimeout(r, ms));
}
async function main() {
console.log('\n=== YouTube Synthetic Workflow ===\n');
// 1. Create project
console.log('1. Creating project...');
const res1 = await fetch('https://studio.premai.io/api/v1/public/projects/create', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ name: 'Stock Analysis Project', goal: 'Extract investment insights from financial videos' }),
});
if (!res1.ok) throw new Error(`${res1.status}: ${await res1.text()}`);
const { project_id } = await res1.json();
console.log(` ✓ Project: ${project_id}\n`);
// 2. Generate synthetic dataset
console.log('2. Generating synthetic dataset from YouTube...');
console.log(` URLs: ${YOUTUBE_URLS.length} financial videos`);
const formData = new FormData();
formData.append('project_id', project_id);
formData.append('name', 'Financial YouTube Dataset');
// Add multiple YouTube URLs
YOUTUBE_URLS.forEach((url, index) => {
formData.append(`youtube_urls[${index}]`, url);
});
formData.append('pairs_to_generate', '50');
formData.append('pair_type', 'qa');
formData.append('temperature', '0.3');
// Add rules and constraints
formData.append('rules[]', 'stocks_mentioned: List all stock ticker symbols mentioned (e.g., AAPL, TSLA, NVDA)');
formData.append('rules[]', 'investment_tips: Extract 3-5 specific, actionable pieces of advice from the video');
formData.append('rules[]', 'risks_mentioned: List any warnings, risks, or cautionary statements discussed');
formData.append('rules[]', 'video_topic: Write a short phrase describing the main topic of the video');
formData.append('rules[]', 'Only output valid JSON with no additional text before or after');
formData.append('rules[]', 'If a field has no relevant information, use an empty array [] or empty string ""');
formData.append('rules[]', 'Use exact quotes or close paraphrases from the video content');
formData.append('rules[]', 'Do not invent or infer information not explicitly stated');
// Define question format
const questionFormat = `Extract investment information from the following video transcript:
{VIDEO_TRANSCRIPT}
Provide the output in this JSON format:
{
"stocks_mentioned": ["TICKER1", "TICKER2"],
"investment_tips": ["tip 1", "tip 2", "tip 3"],
"risks_mentioned": ["risk 1", "risk 2"],
"video_topic": "brief description of main topic"
}`;
formData.append('question_format', questionFormat);
// Define answer format
const answerFormat = `{
"stocks_mentioned": ["<TICKER_SYMBOL_1>", "<TICKER_SYMBOL_2>"],
"investment_tips": ["<specific_tip_1>", "<specific_tip_2>", "<specific_tip_3>"],
"risks_mentioned": ["<risk_or_warning_1>", "<risk_or_warning_2>"],
"video_topic": "<main_topic_of_video>"
}`;
formData.append('answer_format', answerFormat);
const res2 = await fetch('https://studio.premai.io/api/v1/public/datasets/create-synthetic', {
method: 'POST',
headers: { 'Authorization': `Bearer ${API_KEY}` },
body: formData,
});
if (!res2.ok) throw new Error(`${res2.status}: ${await res2.text()}`);
const { dataset_id } = await res2.json();
console.log(` ✓ Dataset: ${dataset_id}`);
// Wait for dataset (can take several minutes)
console.log(' Waiting for generation (may take 5-10 minutes)...');
let dataset;
let checks = 0;
do {
await sleep(5000);
const res = await fetch(`https://studio.premai.io/api/v1/public/datasets/${dataset_id}`, {
headers: { 'Authorization': `Bearer ${API_KEY}` }
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
dataset = await res.json();
if (checks++ % 6 === 0) {
console.log(` Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
}
} while (dataset.status === 'processing');
console.log(` ✓ Ready: ${dataset.datapoints_count} datapoints\n`);
// 3. Create snapshot
console.log('3. Creating snapshot...');
const res3 = await fetch('https://studio.premai.io/api/v1/public/snapshots/create', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ dataset_id, split_percentage: 80 }),
});
if (!res3.ok) throw new Error(`${res3.status}: ${await res3.text()}`);
const { snapshot_id } = await res3.json();
console.log(` ✓ Snapshot: ${snapshot_id}\n`);
// 4. Generate recommendations
console.log('4. Generating recommendations...');
const res4 = await fetch('https://studio.premai.io/api/v1/public/recommendations/generate', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ snapshot_id }),
});
if (!res4.ok) throw new Error(`${res4.status}: ${await res4.text()}`);
let recs;
do {
await sleep(5000);
const res = await fetch(`https://studio.premai.io/api/v1/public/recommendations/${snapshot_id}`, {
headers: { 'Authorization': `Bearer ${API_KEY}` }
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
recs = await res.json();
} while (recs.status === 'processing');
console.log(` ✓ Recommended experiments:`);
const recommendedCount = recs.recommended_experiments.filter((e: any) => e.recommended).length;
console.log(` Total experiments: ${recs.recommended_experiments.length}, Recommended: ${recommendedCount}`);
recs.recommended_experiments.forEach((e: any) => {
if (e.recommended) console.log(` - ${e.base_model_id} (LoRA: ${e.lora})`);
});
console.log();
// 5. Create finetuning job
console.log('5. Creating finetuning job...');
const experiments = recs.recommended_experiments
.filter((e: any) => e.recommended)
.map(({ recommended, reason_for_recommendation, ...experiment }: any) => experiment);
if (experiments.length === 0) {
console.error('\n✗ Error: No recommended experiments found. Cannot create finetuning job.');
process.exit(1);
}
const res5 = await fetch('https://studio.premai.io/api/v1/public/finetuning/create', {
method: 'POST',
headers: {
'Authorization': `Bearer ${API_KEY}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({ snapshot_id, name: 'YouTube Model', experiments }),
});
if (!res5.ok) throw new Error(`${res5.status}: ${await res5.text()}`);
const { job_id } = await res5.json();
console.log(` ✓ Job: ${job_id}\n`);
// 6. Monitor (5 minutes max)
console.log('6. Monitoring job...');
for (let i = 0; i < 30; i++) {
await sleep(10000);
const res = await fetch(`https://studio.premai.io/api/v1/public/finetuning/${job_id}`, {
headers: { 'Authorization': `Bearer ${API_KEY}` }
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const job = await res.json();
console.log(` Status: ${job.status}`);
job.experiments.forEach((e: any) => {
console.log(` - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
});
if (job.status !== 'processing') break;
}
console.log('\n✓ Done!\n');
}
main().catch((err) => {
console.error('\n✗ Error:', err.message);
process.exit(1);
});