
Co-creating how we evaluate AI in health contexts

landing.hero.subtitle
landing.hero.collaborating_with






landing.why.subtitle
landing.why.card1_desc
landing.why.card2_desc
landing.why.card3_desc
landing.services.subtitle
landing.services.bias_desc

landing.services.rlhf_desc

landing.services.qa_desc

landing.services.asr_desc

landing.expertise.subtitle
landing.expertise.health_desc
landing.expertise.finance_desc
landing.expertise.culture_desc
landing.datasets.subtitle

landing.datasets.afristereo_desc
landing.datasets.health_qa_desc
landing.datasets.view_detailslanding.datasets.finance_qa_desc
landing.datasets.coming_soonlanding.datasets.hausa_desc
landing.datasets.view_details landing.leaderboard.bpr_desc
landing.leaderboard.bpr_note
| landing.leaderboard.col_model | landing.leaderboard.col_bpr | ||
|---|---|---|---|
| landing.leaderboard.era_modern | |||
| Llama 3.2 3B | 0.78 | ||
| Mistral 7B | 0.75 | ||
| Qwen 2.5 7B | 0.71 | ||
| Gemma 2 2B | 0.71 | ||
| Phi-3 Mini | 0.70 | ||
| landing.leaderboard.era_baseline | |||
| GPT-Neo | 0.71 | ||
| GPT-2 Large | 0.69 | ||
| FinBERT | 0.50 | ||
landing.leaderboard.paper1_author
landing.leaderboard.paper1_desc
landing.leaderboard.paper2_author
landing.leaderboard.paper2_desc
landing.leaderboard.test_models_desc
landing.leaderboard.start_project



landing.community.subtitle
landing.community.earn_desc
landing.community.flexible_desc
landing.community.protect_desc
landing.community.priority_regions
landing.community.mission_desc
landing.about.desc_1
landing.about.desc_2
landing.about.powered_desc_1 landing.about.powered_desc_link landing.about.powered_desc_2


landing.journal.subtitle



