Skip to main content

Overview

Build a model that extracts structured data from YouTube videos:
  • Process YouTube videos to generate synthetic training data
  • Use question/answer templates to extract JSON output
  • Extract stock tickers, investment tips, risks, and topics
  • Create snapshots, get recommendations, and launch fine-tuning
Export your Prem API key as API_KEY before running any script. The example uses 5 curated financial YouTube videos. You can modify the YOUTUBE_URLS array to use your own videos.
1

Set YouTube URLs

const API_KEY = process.env.API_KEY;

// Define the YouTube videos you want to analyze
const YOUTUBE_URLS = [
  'https://www.youtube.com/watch?v=JH-k5f4Yclc',
  'https://www.youtube.com/watch?v=YEWhxcpMS1c',
  'https://www.youtube.com/watch?v=cb8up3HVXis',
  'https://www.youtube.com/watch?v=26xatIiMv88',
  'https://www.youtube.com/watch?v=-Da3gUdzCvs'
];
2

Generate dataset from YouTube

Create a project and generate synthetic Q&A pairs from YouTube videos. See Create Project and Create Synthetic Dataset for details.
const res = await fetch('https://studio.premai.io/api/v1/public/projects/create', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({ name: 'Stock Analysis Project', goal: 'Extract investment insights from financial videos' })
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const { project_id } = await res.json();

const formData = new FormData();
formData.append('project_id', project_id);
formData.append('name', 'Financial YouTube Dataset');

// Add YouTube URLs
YOUTUBE_URLS.forEach((url: string, index: number) => {
  formData.append(`youtube_urls[${index}]`, url);
});

formData.append('pairs_to_generate', '50');
formData.append('pair_type', 'qa');
formData.append('temperature', '0.3');

// Add rules and constraints
formData.append('rules[]', 'stocks_mentioned: List all stock ticker symbols mentioned (e.g., AAPL, TSLA, NVDA)');
formData.append('rules[]', 'investment_tips: Extract 3-5 specific, actionable pieces of advice from the video');
formData.append('rules[]', 'risks_mentioned: List any warnings, risks, or cautionary statements discussed');
formData.append('rules[]', 'video_topic: Write a short phrase describing the main topic of the video');
formData.append('rules[]', 'Only output valid JSON with no additional text before or after');
formData.append('rules[]', 'If a field has no relevant information, use an empty array [] or empty string ""');
formData.append('rules[]', 'Use exact quotes or close paraphrases from the video content');
formData.append('rules[]', 'Do not invent or infer information not explicitly stated');

// Define question format
const questionFormat = `Extract investment information from the following video transcript:

{VIDEO_TRANSCRIPT}

Provide the output in this JSON format:
{
"stocks_mentioned": ["TICKER1", "TICKER2"],
"investment_tips": ["tip 1", "tip 2", "tip 3"],
"risks_mentioned": ["risk 1", "risk 2"],
"video_topic": "brief description of main topic"
}`;
formData.append('question_format', questionFormat);

// Define answer format
const answerFormat = `{
"stocks_mentioned": ["<TICKER_SYMBOL_1>", "<TICKER_SYMBOL_2>"],
"investment_tips": ["<specific_tip_1>", "<specific_tip_2>", "<specific_tip_3>"],
"risks_mentioned": ["<risk_or_warning_1>", "<risk_or_warning_2>"],
"video_topic": "<main_topic_of_video>"
}`;
formData.append('answer_format', answerFormat);

const res2 = await fetch('https://studio.premai.io/api/v1/public/datasets/create-synthetic', {
  method: 'POST',
  headers: { 'Authorization': `Bearer ${API_KEY}` },
  body: formData
});
if (!res2.ok) throw new Error(`${res2.status}: ${await res2.text()}`);
const { dataset_id } = await res2.json();
Generation can take 5-10 minutes.
3

Wait for generation

Poll the dataset status until generation completes. See Get Dataset for details.
let dataset;
let checks = 0;
do {
  await sleep(5000);
  const res = await fetch(`https://studio.premai.io/api/v1/public/datasets/${dataset_id}`, {
    headers: { 'Authorization': `Bearer ${API_KEY}` }
  });
  if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
  dataset = await res.json();
  if (checks++ % 6 === 0) {
    console.log(`Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
  }
} while (dataset.status === 'processing');
4

Create snapshot and get recommendations

Create a snapshot and generate model recommendations. See Create Snapshot, Generate Recommendations, and Get Recommendations for details.
const res = await fetch('https://studio.premai.io/api/v1/public/snapshots/create', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({ dataset_id, split_percentage: 80 })
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const { snapshot_id } = await res.json();

const res2 = await fetch('https://studio.premai.io/api/v1/public/recommendations/generate', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({ snapshot_id })
});
if (!res2.ok) throw new Error(`${res2.status}: ${await res2.text()}`);

let recs;
do {
  await sleep(5000);
  const res3 = await fetch(`https://studio.premai.io/api/v1/public/recommendations/${snapshot_id}`, {
    headers: { 'Authorization': `Bearer ${API_KEY}` }
  });
  if (!res3.ok) throw new Error(`${res3.status}: ${await res3.text()}`);
  recs = await res3.json();
} while (recs.status === 'processing');
5

Start fine-tuning

Launch a fine-tuning job with recommended experiments. See Create Fine-Tuning Job for details.
const experiments = recs.recommended_experiments
  .filter((e: any) => e.recommended)
  .map(({ recommended, reason_for_recommendation, ...experiment }: any) => experiment);

const res = await fetch('https://studio.premai.io/api/v1/public/finetuning/create', {
  method: 'POST',
  headers: {
    'Authorization': `Bearer ${API_KEY}`,
    'Content-Type': 'application/json'
  },
  body: JSON.stringify({ snapshot_id, name: 'YouTube Model', experiments })
});
if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
const { job_id } = await res.json();
6

Monitor job

for (let i = 0; i < 30; i++) {
  await sleep(10000);
  const res = await fetch(`https://studio.premai.io/api/v1/public/finetuning/${job_id}`, {
    headers: { 'Authorization': `Bearer ${API_KEY}` }
  });
  if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
  const job = await res.json();
  console.log(`Status: ${job.status}`);
  job.experiments.forEach((e: any) => {
    console.log(`  - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
  });
  if (job.status !== 'processing') break;
}
Monitor fine-tuning job progress and status. See Get Fine-Tuning Job for details.

Full Example

#!/usr/bin/env bun

/**
 * Example 2: YouTube synthetic dataset workflow
 * 1. Create project → 2. Generate synthetic data from YouTube → 3. Create snapshot → 4. Get recommendations → 5. Run finetuning
 */

const API_KEY = process.env.API_KEY;
const YOUTUBE_URLS = [
	'https://www.youtube.com/watch?v=JH-k5f4Yclc',
	'https://www.youtube.com/watch?v=YEWhxcpMS1c',
	'https://www.youtube.com/watch?v=cb8up3HVXis',
	'https://www.youtube.com/watch?v=26xatIiMv88',
	'https://www.youtube.com/watch?v=-Da3gUdzCvs'
];

if (!API_KEY) {
	console.error('Error: API_KEY environment variable is required');
	console.error('Please create a .env file based on .env.example');
	process.exit(1);
}

function sleep(ms: number) {
	return new Promise((r) => setTimeout(r, ms));
}

async function main() {
	console.log('\n=== YouTube Synthetic Workflow ===\n');

	// 1. Create project
	console.log('1. Creating project...');
	const res1 = await fetch('https://studio.premai.io/api/v1/public/projects/create', {
		method: 'POST',
		headers: {
			'Authorization': `Bearer ${API_KEY}`,
			'Content-Type': 'application/json'
		},
		body: JSON.stringify({ name: 'Stock Analysis Project', goal: 'Extract investment insights from financial videos' }),
	});
	if (!res1.ok) throw new Error(`${res1.status}: ${await res1.text()}`);
	const { project_id } = await res1.json();
	console.log(`   ✓ Project: ${project_id}\n`);

	// 2. Generate synthetic dataset
	console.log('2. Generating synthetic dataset from YouTube...');
	console.log(`   URLs: ${YOUTUBE_URLS.length} financial videos`);
	const formData = new FormData();
	formData.append('project_id', project_id);
	formData.append('name', 'Financial YouTube Dataset');

	// Add multiple YouTube URLs
	YOUTUBE_URLS.forEach((url, index) => {
		formData.append(`youtube_urls[${index}]`, url);
	});

	formData.append('pairs_to_generate', '50');
	formData.append('pair_type', 'qa');
	formData.append('temperature', '0.3');

	// Add rules and constraints
	formData.append('rules[]', 'stocks_mentioned: List all stock ticker symbols mentioned (e.g., AAPL, TSLA, NVDA)');
	formData.append('rules[]', 'investment_tips: Extract 3-5 specific, actionable pieces of advice from the video');
	formData.append('rules[]', 'risks_mentioned: List any warnings, risks, or cautionary statements discussed');
	formData.append('rules[]', 'video_topic: Write a short phrase describing the main topic of the video');
	formData.append('rules[]', 'Only output valid JSON with no additional text before or after');
	formData.append('rules[]', 'If a field has no relevant information, use an empty array [] or empty string ""');
	formData.append('rules[]', 'Use exact quotes or close paraphrases from the video content');
	formData.append('rules[]', 'Do not invent or infer information not explicitly stated');

	// Define question format
	const questionFormat = `Extract investment information from the following video transcript:

{VIDEO_TRANSCRIPT}

Provide the output in this JSON format:
{
  "stocks_mentioned": ["TICKER1", "TICKER2"],
  "investment_tips": ["tip 1", "tip 2", "tip 3"],
  "risks_mentioned": ["risk 1", "risk 2"],
  "video_topic": "brief description of main topic"
}`;
	formData.append('question_format', questionFormat);

	// Define answer format
	const answerFormat = `{
  "stocks_mentioned": ["<TICKER_SYMBOL_1>", "<TICKER_SYMBOL_2>"],
  "investment_tips": ["<specific_tip_1>", "<specific_tip_2>", "<specific_tip_3>"],
  "risks_mentioned": ["<risk_or_warning_1>", "<risk_or_warning_2>"],
  "video_topic": "<main_topic_of_video>"
}`;
	formData.append('answer_format', answerFormat);

	const res2 = await fetch('https://studio.premai.io/api/v1/public/datasets/create-synthetic', {
		method: 'POST',
		headers: { 'Authorization': `Bearer ${API_KEY}` },
		body: formData,
	});
	if (!res2.ok) throw new Error(`${res2.status}: ${await res2.text()}`);
	const { dataset_id } = await res2.json();
	console.log(`   ✓ Dataset: ${dataset_id}`);

	// Wait for dataset (can take several minutes)
	console.log('   Waiting for generation (may take 5-10 minutes)...');
	let dataset;
	let checks = 0;
	do {
		await sleep(5000);
		const res = await fetch(`https://studio.premai.io/api/v1/public/datasets/${dataset_id}`, {
			headers: { 'Authorization': `Bearer ${API_KEY}` }
		});
		if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
		dataset = await res.json();
		if (checks++ % 6 === 0) {
			console.log(`   Status: ${dataset.status}, ${dataset.datapoints_count} datapoints`);
		}
	} while (dataset.status === 'processing');
	console.log(`   ✓ Ready: ${dataset.datapoints_count} datapoints\n`);

	// 3. Create snapshot
	console.log('3. Creating snapshot...');
	const res3 = await fetch('https://studio.premai.io/api/v1/public/snapshots/create', {
		method: 'POST',
		headers: {
			'Authorization': `Bearer ${API_KEY}`,
			'Content-Type': 'application/json'
		},
		body: JSON.stringify({ dataset_id, split_percentage: 80 }),
	});
	if (!res3.ok) throw new Error(`${res3.status}: ${await res3.text()}`);
	const { snapshot_id } = await res3.json();
	console.log(`   ✓ Snapshot: ${snapshot_id}\n`);

	// 4. Generate recommendations
	console.log('4. Generating recommendations...');
	const res4 = await fetch('https://studio.premai.io/api/v1/public/recommendations/generate', {
		method: 'POST',
		headers: {
			'Authorization': `Bearer ${API_KEY}`,
			'Content-Type': 'application/json'
		},
		body: JSON.stringify({ snapshot_id }),
	});
	if (!res4.ok) throw new Error(`${res4.status}: ${await res4.text()}`);

	let recs;
	do {
		await sleep(5000);
		const res = await fetch(`https://studio.premai.io/api/v1/public/recommendations/${snapshot_id}`, {
			headers: { 'Authorization': `Bearer ${API_KEY}` }
		});
		if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
		recs = await res.json();
	} while (recs.status === 'processing');

	console.log(`   ✓ Recommended experiments:`);
	const recommendedCount = recs.recommended_experiments.filter((e: any) => e.recommended).length;
	console.log(`   Total experiments: ${recs.recommended_experiments.length}, Recommended: ${recommendedCount}`);
	recs.recommended_experiments.forEach((e: any) => {
		if (e.recommended) console.log(`     - ${e.base_model_id} (LoRA: ${e.lora})`);
	});
	console.log();

	// 5. Create finetuning job
	console.log('5. Creating finetuning job...');
	const experiments = recs.recommended_experiments
		.filter((e: any) => e.recommended)
		.map(({ recommended, reason_for_recommendation, ...experiment }: any) => experiment);

	if (experiments.length === 0) {
		console.error('\n✗ Error: No recommended experiments found. Cannot create finetuning job.');
		process.exit(1);
	}

	const res5 = await fetch('https://studio.premai.io/api/v1/public/finetuning/create', {
		method: 'POST',
		headers: {
			'Authorization': `Bearer ${API_KEY}`,
			'Content-Type': 'application/json'
		},
		body: JSON.stringify({ snapshot_id, name: 'YouTube Model', experiments }),
	});
	if (!res5.ok) throw new Error(`${res5.status}: ${await res5.text()}`);
	const { job_id } = await res5.json();
	console.log(`   ✓ Job: ${job_id}\n`);

	// 6. Monitor (5 minutes max)
	console.log('6. Monitoring job...');
	for (let i = 0; i < 30; i++) {
		await sleep(10000);
		const res = await fetch(`https://studio.premai.io/api/v1/public/finetuning/${job_id}`, {
			headers: { 'Authorization': `Bearer ${API_KEY}` }
		});
		if (!res.ok) throw new Error(`${res.status}: ${await res.text()}`);
		const job = await res.json();
		console.log(`   Status: ${job.status}`);
		job.experiments.forEach((e: any) => {
			console.log(`     - Exp #${e.experiment_number}: ${e.status} ${e.model_id || ''}`);
		});
		if (job.status !== 'processing') break;
	}

	console.log('\n✓ Done!\n');
}

main().catch((err) => {
	console.error('\n✗ Error:', err.message);
	process.exit(1);
});