mirror of
https://github.com/lobehub/lobe-chat.git
synced 2026-01-09 07:32:05 +08:00
💄 style: update i18n
Co-authored-by: canisminor1990 <17870709+canisminor1990@users.noreply.github.com>
3339 lines
306 KiB
JSON
3339 lines
306 KiB
JSON
{
|
||
"01-ai/yi-1.5-34b-chat": {
|
||
"description": "Zero One Everything, the latest open-source fine-tuned model with 34 billion parameters, supports various dialogue scenarios with high-quality training data aligned with human preferences."
|
||
},
|
||
"01-ai/yi-1.5-9b-chat": {
|
||
"description": "Zero One Everything, the latest open-source fine-tuned model with 9 billion parameters, supports various dialogue scenarios with high-quality training data aligned with human preferences."
|
||
},
|
||
"360/deepseek-r1": {
|
||
"description": "[360 Deployment Version] DeepSeek-R1 extensively utilizes reinforcement learning techniques in the post-training phase, significantly enhancing model inference capabilities with minimal labeled data. It performs comparably to OpenAI's o1 official version in tasks such as mathematics, coding, and natural language reasoning."
|
||
},
|
||
"360gpt-pro": {
|
||
"description": "360GPT Pro, as an important member of the 360 AI model series, meets diverse natural language application scenarios with efficient text processing capabilities, supporting long text understanding and multi-turn dialogue."
|
||
},
|
||
"360gpt-pro-trans": {
|
||
"description": "A translation-specific model, finely tuned for optimal translation results."
|
||
},
|
||
"360gpt-turbo": {
|
||
"description": "360GPT Turbo offers powerful computation and dialogue capabilities, with excellent semantic understanding and generation efficiency, making it an ideal intelligent assistant solution for enterprises and developers."
|
||
},
|
||
"360gpt-turbo-responsibility-8k": {
|
||
"description": "360GPT Turbo Responsibility 8K emphasizes semantic safety and responsibility, designed specifically for applications with high content safety requirements, ensuring accuracy and robustness in user experience."
|
||
},
|
||
"360gpt2-o1": {
|
||
"description": "360gpt2-o1 builds a chain of thought using tree search and incorporates a reflection mechanism, trained with reinforcement learning, enabling the model to self-reflect and correct errors."
|
||
},
|
||
"360gpt2-pro": {
|
||
"description": "360GPT2 Pro is an advanced natural language processing model launched by 360, featuring exceptional text generation and understanding capabilities, particularly excelling in generation and creative tasks, capable of handling complex language transformations and role-playing tasks."
|
||
},
|
||
"360zhinao2-o1": {
|
||
"description": "360zhinao2-o1 uses tree search to build a chain of thought and introduces a reflection mechanism, utilizing reinforcement learning for training, enabling the model to possess self-reflection and error-correction capabilities."
|
||
},
|
||
"4.0Ultra": {
|
||
"description": "Spark4.0 Ultra is the most powerful version in the Spark large model series, enhancing text content understanding and summarization capabilities while upgrading online search links. It is a comprehensive solution for improving office productivity and accurately responding to demands, leading the industry as an intelligent product."
|
||
},
|
||
"AnimeSharp": {
|
||
"description": "AnimeSharp (also known as “4x-AnimeSharp”) is an open-source super-resolution model developed by Kim2091 based on the ESRGAN architecture, focusing on upscaling and sharpening anime-style images. It was renamed from “4x-TextSharpV1” in February 2022, originally also suitable for text images but significantly optimized for anime content."
|
||
},
|
||
"Baichuan2-Turbo": {
|
||
"description": "Utilizes search enhancement technology to achieve comprehensive links between large models and domain knowledge, as well as knowledge from the entire web. Supports uploads of various documents such as PDF and Word, and URL input, providing timely and comprehensive information retrieval with accurate and professional output."
|
||
},
|
||
"Baichuan3-Turbo": {
|
||
"description": "Optimized for high-frequency enterprise scenarios, significantly improving performance and cost-effectiveness. Compared to the Baichuan2 model, content creation improves by 20%, knowledge Q&A by 17%, and role-playing ability by 40%. Overall performance is superior to GPT-3.5."
|
||
},
|
||
"Baichuan3-Turbo-128k": {
|
||
"description": "Features a 128K ultra-long context window, optimized for high-frequency enterprise scenarios, significantly improving performance and cost-effectiveness. Compared to the Baichuan2 model, content creation improves by 20%, knowledge Q&A by 17%, and role-playing ability by 40%. Overall performance is superior to GPT-3.5."
|
||
},
|
||
"Baichuan4": {
|
||
"description": "The model is the best in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also boasts industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks."
|
||
},
|
||
"Baichuan4-Air": {
|
||
"description": "The leading model in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also possesses industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks."
|
||
},
|
||
"Baichuan4-Turbo": {
|
||
"description": "The leading model in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also possesses industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks."
|
||
},
|
||
"ByteDance-Seed/Seed-OSS-36B-Instruct": {
|
||
"description": "Seed-OSS is a series of open-source large language models developed by ByteDance's Seed team, designed specifically for powerful long-context processing, reasoning, agents, and general capabilities. The Seed-OSS-36B-Instruct in this series is an instruction-tuned model with 36 billion parameters, natively supporting ultra-long context lengths, enabling it to handle massive documents or complex codebases in a single pass. This model is specially optimized for reasoning, code generation, and agent tasks (such as tool usage), while maintaining balanced and excellent general capabilities. A key feature of this model is the \"Thinking Budget\" function, which allows users to flexibly adjust the reasoning length as needed, effectively improving reasoning efficiency in practical applications."
|
||
},
|
||
"DeepSeek-R1": {
|
||
"description": "A state-of-the-art efficient LLM, skilled in reasoning, mathematics, and programming."
|
||
},
|
||
"DeepSeek-R1-Distill-Llama-70B": {
|
||
"description": "DeepSeek R1— the larger and smarter model in the DeepSeek suite— distilled into the Llama 70B architecture. Based on benchmark testing and human evaluation, this model is smarter than the original Llama 70B, particularly excelling in tasks requiring mathematical and factual accuracy."
|
||
},
|
||
"DeepSeek-R1-Distill-Qwen-1.5B": {
|
||
"description": "The DeepSeek-R1 distillation model based on Qwen2.5-Math-1.5B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
|
||
},
|
||
"DeepSeek-R1-Distill-Qwen-14B": {
|
||
"description": "The DeepSeek-R1 distillation model based on Qwen2.5-14B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
|
||
},
|
||
"DeepSeek-R1-Distill-Qwen-32B": {
|
||
"description": "The DeepSeek-R1 series optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks, surpassing the level of OpenAI-o1-mini."
|
||
},
|
||
"DeepSeek-R1-Distill-Qwen-7B": {
|
||
"description": "The DeepSeek-R1 distillation model based on Qwen2.5-Math-7B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
|
||
},
|
||
"DeepSeek-V3": {
|
||
"description": "DeepSeek-V3 is a MoE model developed in-house by Deep Seek Company. Its performance surpasses that of other open-source models such as Qwen2.5-72B and Llama-3.1-405B in multiple assessments, and it stands on par with the world's top proprietary models like GPT-4o and Claude-3.5-Sonnet."
|
||
},
|
||
"DeepSeek-V3-1": {
|
||
"description": "DeepSeek V3.1: Next-generation reasoning model, enhancing complex reasoning and chain-of-thought capabilities, ideal for tasks requiring in-depth analysis."
|
||
},
|
||
"DeepSeek-V3-Fast": {
|
||
"description": "Model provider: sophnet platform. DeepSeek V3 Fast is the high-TPS ultra-fast version of DeepSeek V3 0324, fully powered without quantization, featuring enhanced coding and mathematical capabilities for faster response!"
|
||
},
|
||
"DeepSeek-V3.1": {
|
||
"description": "DeepSeek-V3.1 - Non-Thinking Mode; DeepSeek-V3.1 is a newly launched hybrid reasoning model by DeepSeek, supporting both thinking and non-thinking reasoning modes, with higher thinking efficiency compared to DeepSeek-R1-0528. Post-training optimization significantly enhances agent tool usage and agent task performance."
|
||
},
|
||
"DeepSeek-V3.1-Fast": {
|
||
"description": "DeepSeek V3.1 Fast is the high-TPS, ultra-fast version of DeepSeek V3.1. Hybrid Thinking Mode: By changing the chat template, a single model can support both thinking and non-thinking modes simultaneously. Smarter Tool Invocation: Post-training optimization significantly improves the model's performance in tool usage and agent tasks."
|
||
},
|
||
"DeepSeek-V3.1-Think": {
|
||
"description": "DeepSeek-V3.1 - Thinking Mode; DeepSeek-V3.1 is a newly launched hybrid reasoning model by DeepSeek, supporting both thinking and non-thinking reasoning modes, with higher thinking efficiency compared to DeepSeek-R1-0528. Post-training optimization significantly enhances agent tool usage and agent task performance."
|
||
},
|
||
"DeepSeek-V3.2-Exp": {
|
||
"description": "DeepSeek V3.2 is the latest general-purpose large model released by DeepSeek, supporting a hybrid inference architecture and featuring enhanced Agent capabilities."
|
||
},
|
||
"DeepSeek-V3.2-Exp-Think": {
|
||
"description": "DeepSeek V3.2 Thinking Mode. Before outputting the final answer, the model first generates a chain of thought to improve the accuracy of the final response."
|
||
},
|
||
"Doubao-lite-128k": {
|
||
"description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 128k context window."
|
||
},
|
||
"Doubao-lite-32k": {
|
||
"description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 32k context window."
|
||
},
|
||
"Doubao-lite-4k": {
|
||
"description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 4k context window."
|
||
},
|
||
"Doubao-pro-128k": {
|
||
"description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 128k context window."
|
||
},
|
||
"Doubao-pro-32k": {
|
||
"description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 32k context window."
|
||
},
|
||
"Doubao-pro-4k": {
|
||
"description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 4k context window."
|
||
},
|
||
"DreamO": {
|
||
"description": "DreamO is an open-source image customization generation model jointly developed by ByteDance and Peking University, designed to support multi-task image generation through a unified architecture. It employs an efficient compositional modeling approach to generate highly consistent and customized images based on multiple user-specified conditions such as identity, subject, style, and background."
|
||
},
|
||
"ERNIE-3.5-128K": {
|
||
"description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information."
|
||
},
|
||
"ERNIE-3.5-8K": {
|
||
"description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information."
|
||
},
|
||
"ERNIE-3.5-8K-Preview": {
|
||
"description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information."
|
||
},
|
||
"ERNIE-4.0-8K-Latest": {
|
||
"description": "Baidu's self-developed flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
|
||
},
|
||
"ERNIE-4.0-8K-Preview": {
|
||
"description": "Baidu's self-developed flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
|
||
},
|
||
"ERNIE-4.0-Turbo-8K-Latest": {
|
||
"description": "Baidu's self-developed flagship ultra-large-scale language model, demonstrating excellent overall performance, suitable for complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It offers better performance compared to ERNIE 4.0."
|
||
},
|
||
"ERNIE-4.0-Turbo-8K-Preview": {
|
||
"description": "Baidu's self-developed flagship ultra-large-scale language model, demonstrating excellent overall performance, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It outperforms ERNIE 4.0 in performance."
|
||
},
|
||
"ERNIE-Character-8K": {
|
||
"description": "Baidu's self-developed vertical scene large language model, suitable for applications such as game NPCs, customer service dialogues, and role-playing conversations, featuring more distinct and consistent character styles, stronger adherence to instructions, and superior inference performance."
|
||
},
|
||
"ERNIE-Lite-Pro-128K": {
|
||
"description": "Baidu's self-developed lightweight large language model, balancing excellent model performance with inference efficiency, offering better results than ERNIE Lite, suitable for inference on low-power AI acceleration cards."
|
||
},
|
||
"ERNIE-Speed-128K": {
|
||
"description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance."
|
||
},
|
||
"ERNIE-Speed-Pro-128K": {
|
||
"description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, providing better results than ERNIE Speed, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance."
|
||
},
|
||
"FLUX-1.1-pro": {
|
||
"description": "FLUX.1.1 Pro"
|
||
},
|
||
"FLUX.1-Kontext-dev": {
|
||
"description": "FLUX.1-Kontext-dev is a multimodal image generation and editing model developed by Black Forest Labs based on the Rectified Flow Transformer architecture, featuring 12 billion parameters. It specializes in generating, reconstructing, enhancing, or editing images under given contextual conditions. The model combines the controllable generation advantages of diffusion models with the contextual modeling capabilities of Transformers, supporting high-quality image output and widely applicable to image restoration, completion, and visual scene reconstruction tasks."
|
||
},
|
||
"FLUX.1-Kontext-pro": {
|
||
"description": "FLUX.1 Kontext [pro]"
|
||
},
|
||
"FLUX.1-dev": {
|
||
"description": "FLUX.1-dev is an open-source multimodal language model (MLLM) developed by Black Forest Labs, optimized for vision-and-language tasks by integrating image and text understanding and generation capabilities. Built upon advanced large language models such as Mistral-7B, it achieves vision-language collaborative processing and complex task reasoning through a carefully designed visual encoder and multi-stage instruction fine-tuning."
|
||
},
|
||
"Gryphe/MythoMax-L2-13b": {
|
||
"description": "MythoMax-L2 (13B) is an innovative model suitable for multi-domain applications and complex tasks."
|
||
},
|
||
"HelloMeme": {
|
||
"description": "HelloMeme is an AI tool that automatically generates memes, GIFs, or short videos based on the images or actions you provide. It requires no drawing or programming skills; simply prepare reference images, and it will help you create visually appealing, fun, and stylistically consistent content."
|
||
},
|
||
"HiDream-I1-Full": {
|
||
"description": "HiDream-E1-Full is an open-source multimodal image editing large model launched by HiDream.ai, based on the advanced Diffusion Transformer architecture combined with powerful language understanding capabilities (embedded LLaMA 3.1-8B-Instruct). It supports image generation, style transfer, local editing, and content repainting through natural language instructions, demonstrating excellent vision-language comprehension and execution abilities."
|
||
},
|
||
"HunyuanDiT-v1.2-Diffusers-Distilled": {
|
||
"description": "hunyuandit-v1.2-distilled is a lightweight text-to-image model optimized through distillation, capable of rapidly generating high-quality images, especially suitable for low-resource environments and real-time generation tasks."
|
||
},
|
||
"InstantCharacter": {
|
||
"description": "InstantCharacter is a tuning-free personalized character generation model released by Tencent AI team in 2025, designed to achieve high-fidelity, cross-scene consistent character generation. The model supports character modeling based on a single reference image and can flexibly transfer the character to various styles, actions, and backgrounds."
|
||
},
|
||
"InternVL2-8B": {
|
||
"description": "InternVL2-8B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers."
|
||
},
|
||
"InternVL2.5-26B": {
|
||
"description": "InternVL2.5-26B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers."
|
||
},
|
||
"Kolors": {
|
||
"description": "Kolors is a text-to-image model developed by the Kuaishou Kolors team. Trained with billions of parameters, it excels in visual quality, Chinese semantic understanding, and text rendering."
|
||
},
|
||
"Kwai-Kolors/Kolors": {
|
||
"description": "Kolors is a large-scale latent diffusion text-to-image generation model developed by the Kuaishou Kolors team. Trained on billions of text-image pairs, it demonstrates significant advantages in visual quality, complex semantic accuracy, and Chinese and English character rendering. It supports both Chinese and English inputs and performs exceptionally well in understanding and generating Chinese-specific content."
|
||
},
|
||
"Llama-3.2-11B-Vision-Instruct": {
|
||
"description": "Exhibits outstanding image reasoning capabilities on high-resolution images, suitable for visual understanding applications."
|
||
},
|
||
"Llama-3.2-90B-Vision-Instruct\t": {
|
||
"description": "Advanced image reasoning capabilities suitable for visual understanding agent applications."
|
||
},
|
||
"Meta-Llama-3-3-70B-Instruct": {
|
||
"description": "Llama 3.3 70B: A versatile Transformer model suitable for conversational and generative tasks."
|
||
},
|
||
"Meta-Llama-3.1-405B-Instruct": {
|
||
"description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
|
||
},
|
||
"Meta-Llama-3.1-70B-Instruct": {
|
||
"description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
|
||
},
|
||
"Meta-Llama-3.1-8B-Instruct": {
|
||
"description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
|
||
},
|
||
"Meta-Llama-3.2-1B-Instruct": {
|
||
"description": "An advanced cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
|
||
},
|
||
"Meta-Llama-3.2-3B-Instruct": {
|
||
"description": "An advanced cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
|
||
},
|
||
"Meta-Llama-3.3-70B-Instruct": {
|
||
"description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at a very low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023."
|
||
},
|
||
"Meta-Llama-4-Maverick-17B-128E-Instruct-FP8": {
|
||
"description": "Llama 4 Maverick: A large-scale model based on Mixture-of-Experts, offering an efficient expert activation strategy for superior inference performance."
|
||
},
|
||
"MiniMax-M1": {
|
||
"description": "A newly developed inference model. World-leading: 80K chain-of-thought x 1M input, delivering performance on par with top-tier international models."
|
||
},
|
||
"MiniMax-M2": {
|
||
"description": "Purpose-built for efficient coding and agent workflows."
|
||
},
|
||
"MiniMax-Text-01": {
|
||
"description": "In the MiniMax-01 series of models, we have made bold innovations: for the first time, we have implemented a linear attention mechanism on a large scale, making the traditional Transformer architecture no longer the only option. This model has a parameter count of up to 456 billion, with a single activation of 45.9 billion. Its overall performance rivals that of top overseas models while efficiently handling the world's longest context of 4 million tokens, which is 32 times that of GPT-4o and 20 times that of Claude-3.5-Sonnet."
|
||
},
|
||
"MiniMaxAI/MiniMax-M1-80k": {
|
||
"description": "MiniMax-M1 is a large-scale hybrid attention inference model with open-source weights, featuring 456 billion parameters, with approximately 45.9 billion parameters activated per token. The model natively supports ultra-long contexts of up to 1 million tokens and, through lightning attention mechanisms, reduces floating-point operations by 75% compared to DeepSeek R1 in tasks generating 100,000 tokens. Additionally, MiniMax-M1 employs a Mixture of Experts (MoE) architecture, combining the CISPO algorithm with an efficient reinforcement learning training design based on hybrid attention, achieving industry-leading performance in long-input inference and real-world software engineering scenarios."
|
||
},
|
||
"Moonshot-Kimi-K2-Instruct": {
|
||
"description": "With a total of 1 trillion parameters and 32 billion activated parameters, this non-thinking model achieves top-tier performance in cutting-edge knowledge, mathematics, and coding, excelling in general agent tasks. It is carefully optimized for agent tasks, capable not only of answering questions but also taking actions. Ideal for improvisational, general chat, and agent experiences, it is a reflex-level model requiring no prolonged thinking."
|
||
},
|
||
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": {
|
||
"description": "Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B) is a high-precision instruction model suitable for complex computations."
|
||
},
|
||
"OmniConsistency": {
|
||
"description": "OmniConsistency enhances style consistency and generalization in image-to-image tasks by introducing large-scale Diffusion Transformers (DiTs) and paired stylized data, effectively preventing style degradation."
|
||
},
|
||
"Phi-3-medium-128k-instruct": {
|
||
"description": "The same Phi-3-medium model, but with a larger context size for RAG or few-shot prompting."
|
||
},
|
||
"Phi-3-medium-4k-instruct": {
|
||
"description": "A 14B parameter model that provides better quality than Phi-3-mini, focusing on high-quality, reasoning-dense data."
|
||
},
|
||
"Phi-3-mini-128k-instruct": {
|
||
"description": "The same Phi-3-mini model, but with a larger context size for RAG or few-shot prompting."
|
||
},
|
||
"Phi-3-mini-4k-instruct": {
|
||
"description": "The smallest member of the Phi-3 family, optimized for both quality and low latency."
|
||
},
|
||
"Phi-3-small-128k-instruct": {
|
||
"description": "The same Phi-3-small model, but with a larger context size for RAG or few-shot prompting."
|
||
},
|
||
"Phi-3-small-8k-instruct": {
|
||
"description": "A 7B parameter model that provides better quality than Phi-3-mini, focusing on high-quality, reasoning-dense data."
|
||
},
|
||
"Phi-3.5-mini-instruct": {
|
||
"description": "An updated version of the Phi-3-mini model."
|
||
},
|
||
"Phi-3.5-vision-instrust": {
|
||
"description": "An updated version of the Phi-3-vision model."
|
||
},
|
||
"Pro/Qwen/Qwen2-7B-Instruct": {
|
||
"description": "Qwen2-7B-Instruct is an instruction-tuned large language model in the Qwen2 series, with a parameter size of 7B. This model is based on the Transformer architecture and employs techniques such as the SwiGLU activation function, attention QKV bias, and group query attention. It can handle large-scale inputs. The model excels in language understanding, generation, multilingual capabilities, coding, mathematics, and reasoning across multiple benchmark tests, surpassing most open-source models and demonstrating competitive performance comparable to proprietary models in certain tasks. Qwen2-7B-Instruct outperforms Qwen1.5-7B-Chat in multiple evaluations, showing significant performance improvements."
|
||
},
|
||
"Pro/Qwen/Qwen2.5-7B-Instruct": {
|
||
"description": "Qwen2.5-7B-Instruct is one of the latest large language models released by Alibaba Cloud. This 7B model shows significant improvements in coding and mathematics. It also provides multilingual support, covering over 29 languages, including Chinese and English. The model has made notable advancements in instruction following, understanding structured data, and generating structured outputs, especially JSON."
|
||
},
|
||
"Pro/Qwen/Qwen2.5-Coder-7B-Instruct": {
|
||
"description": "Qwen2.5-Coder-7B-Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents."
|
||
},
|
||
"Pro/Qwen/Qwen2.5-VL-7B-Instruct": {
|
||
"description": "Qwen2.5-VL is the newest addition to the Qwen series, featuring enhanced visual comprehension capabilities. It can analyze text, charts, and layouts within images, comprehend long videos while capturing events. The model supports reasoning, tool manipulation, multi-format object localization, and structured output generation. It incorporates optimized dynamic resolution and frame rate training for video understanding, along with improved efficiency in its visual encoder."
|
||
},
|
||
"Pro/THUDM/GLM-4.1V-9B-Thinking": {
|
||
"description": "GLM-4.1V-9B-Thinking is an open-source vision-language model (VLM) jointly released by Zhipu AI and Tsinghua University's KEG Lab, designed specifically for handling complex multimodal cognitive tasks. Based on the GLM-4-9B-0414 foundation model, it significantly enhances cross-modal reasoning ability and stability by introducing the Chain-of-Thought reasoning mechanism and employing reinforcement learning strategies."
|
||
},
|
||
"Pro/THUDM/glm-4-9b-chat": {
|
||
"description": "GLM-4-9B-Chat is the open-source version of the GLM-4 series pre-trained models launched by Zhipu AI. This model excels in semantics, mathematics, reasoning, code, and knowledge. In addition to supporting multi-turn dialogues, GLM-4-9B-Chat also features advanced capabilities such as web browsing, code execution, custom tool invocation (Function Call), and long-text reasoning. The model supports 26 languages, including Chinese, English, Japanese, Korean, and German. In multiple benchmark tests, GLM-4-9B-Chat has demonstrated excellent performance, such as in AlignBench-v2, MT-Bench, MMLU, and C-Eval. The model supports a maximum context length of 128K, making it suitable for academic research and commercial applications."
|
||
},
|
||
"Pro/deepseek-ai/DeepSeek-R1": {
|
||
"description": "DeepSeek-R1 is a reinforcement learning (RL) driven inference model that addresses issues of repetitiveness and readability in models. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematical, coding, and reasoning tasks, and enhances overall effectiveness through carefully designed training methods."
|
||
},
|
||
"Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
|
||
"description": "DeepSeek-R1-Distill-Qwen-7B is a model derived from Qwen2.5-Math-7B through knowledge distillation. It was fine-tuned using 800,000 carefully selected samples generated by DeepSeek-R1, demonstrating exceptional reasoning capabilities. The model achieves outstanding performance across multiple benchmarks, including 92.8% accuracy on MATH-500, a 55.5% pass rate on AIME 2024, and a score of 1189 on CodeForces, showcasing strong mathematical and programming abilities for a 7B-scale model."
|
||
},
|
||
"Pro/deepseek-ai/DeepSeek-V3": {
|
||
"description": "DeepSeek-V3 is a mixed expert (MoE) language model with 671 billion parameters, utilizing multi-head latent attention (MLA) and the DeepSeekMoE architecture, combined with a load balancing strategy without auxiliary loss to optimize inference and training efficiency. Pre-trained on 14.8 trillion high-quality tokens and fine-tuned with supervision and reinforcement learning, DeepSeek-V3 outperforms other open-source models and approaches leading closed-source models."
|
||
},
|
||
"Pro/deepseek-ai/DeepSeek-V3.1-Terminus": {
|
||
"description": "DeepSeek-V3.1-Terminus is an updated version of the V3.1 model released by DeepSeek, positioned as a hybrid agent large language model. This update focuses on fixing user-reported issues and improving stability while maintaining the model's original capabilities. It significantly enhances language consistency, reducing the mixing of Chinese and English and the occurrence of abnormal characters. The model integrates both \"Thinking Mode\" and \"Non-thinking Mode,\" allowing users to switch flexibly between chat templates to suit different tasks. As a key optimization, V3.1-Terminus improves the performance of the Code Agent and Search Agent, making tool invocation and multi-step complex task execution more reliable."
|
||
},
|
||
"Pro/deepseek-ai/DeepSeek-V3.2-Exp": {
|
||
"description": "DeepSeek-V3.2-Exp is an experimental version released by DeepSeek as an intermediate step toward the next-generation architecture. Building on V3.1-Terminus, it introduces the DeepSeek Sparse Attention (DSA) mechanism to enhance training and inference efficiency for long-context scenarios. It features targeted optimizations for tool use, long-document comprehension, and multi-step reasoning. V3.2-Exp serves as a bridge between research and production, ideal for users seeking higher inference efficiency in high-context-budget applications."
|
||
},
|
||
"Pro/moonshotai/Kimi-K2-Instruct-0905": {
|
||
"description": "Kimi K2-Instruct-0905 is the latest and most powerful version of Kimi K2. It is a top-tier Mixture of Experts (MoE) language model with a total of 1 trillion parameters and 32 billion activated parameters. Key features of this model include enhanced agent coding intelligence, demonstrating significant performance improvements in public benchmark tests and real-world agent coding tasks; and an improved frontend coding experience, with advancements in both aesthetics and practicality for frontend programming."
|
||
},
|
||
"QwQ-32B-Preview": {
|
||
"description": "QwQ-32B-Preview is an innovative natural language processing model capable of efficiently handling complex dialogue generation and context understanding tasks."
|
||
},
|
||
"Qwen/QVQ-72B-Preview": {
|
||
"description": "QVQ-72B-Preview is a research-oriented model developed by the Qwen team, focusing on visual reasoning capabilities, with unique advantages in understanding complex scenes and solving visually related mathematical problems."
|
||
},
|
||
"Qwen/QwQ-32B": {
|
||
"description": "QwQ is the inference model of the Qwen series. Compared to traditional instruction-tuned models, QwQ possesses reasoning and cognitive abilities, achieving significantly enhanced performance in downstream tasks, especially in solving difficult problems. QwQ-32B is a medium-sized inference model that competes effectively against state-of-the-art inference models (such as DeepSeek-R1 and o1-mini). This model employs technologies such as RoPE, SwiGLU, RMSNorm, and Attention QKV bias, featuring a 64-layer network structure and 40 Q attention heads (with 8 KV heads in the GQA architecture)."
|
||
},
|
||
"Qwen/QwQ-32B-Preview": {
|
||
"description": "QwQ-32B-Preview is Qwen's latest experimental research model, focusing on enhancing AI reasoning capabilities. By exploring complex mechanisms such as language mixing and recursive reasoning, its main advantages include strong analytical reasoning, mathematical, and programming abilities. However, it also faces challenges such as language switching issues, reasoning loops, safety considerations, and differences in other capabilities."
|
||
},
|
||
"Qwen/Qwen-Image": {
|
||
"description": "Qwen-Image is a foundational image generation model developed by Alibaba's Tongyi Qianwen team, featuring 20 billion parameters. The model has made significant advancements in complex text rendering and precise image editing, excelling particularly at generating images with high-fidelity Chinese and English text. Qwen-Image can handle multi-line layouts and paragraph-level text while maintaining coherent typography and contextual harmony in generated images. Beyond its exceptional text rendering capabilities, the model supports a wide range of artistic styles—from photorealism to anime aesthetics—adapting flexibly to diverse creative needs. It also boasts powerful image editing and understanding capabilities, supporting advanced operations such as style transfer, object addition/removal, detail enhancement, text editing, and even human pose manipulation. Qwen-Image is designed to be a comprehensive foundational model for intelligent visual creation and processing, integrating language, layout, and imagery."
|
||
},
|
||
"Qwen/Qwen-Image-Edit-2509": {
|
||
"description": "Qwen-Image-Edit-2509 is the latest image editing version of Qwen-Image, released by Alibaba's Tongyi Qianwen team. Built upon the 20B-parameter Qwen-Image model, it has been further trained to extend its unique text rendering capabilities into the domain of image editing, enabling precise manipulation of text within images. Qwen-Image-Edit employs an innovative architecture that feeds the input image into both Qwen2.5-VL (for visual semantic control) and a VAE Encoder (for visual appearance control), enabling dual editing capabilities in both semantics and appearance. This allows for not only localized visual edits such as adding, removing, or modifying elements, but also high-level semantic edits like IP creation and style transfer that require semantic consistency. The model has demonstrated state-of-the-art (SOTA) performance across multiple public benchmarks, making it a powerful foundational model for image editing."
|
||
},
|
||
"Qwen/Qwen2-72B-Instruct": {
|
||
"description": "Qwen2 is an advanced general-purpose language model that supports various types of instructions."
|
||
},
|
||
"Qwen/Qwen2-7B-Instruct": {
|
||
"description": "Qwen2-72B-Instruct is an instruction-tuned large language model in the Qwen2 series, with a parameter size of 72B. This model is based on the Transformer architecture and employs techniques such as the SwiGLU activation function, attention QKV bias, and group query attention. It can handle large-scale inputs. The model excels in language understanding, generation, multilingual capabilities, coding, mathematics, and reasoning across multiple benchmark tests, surpassing most open-source models and demonstrating competitive performance comparable to proprietary models in certain tasks."
|
||
},
|
||
"Qwen/Qwen2-VL-72B-Instruct": {
|
||
"description": "Qwen2-VL is the latest iteration of the Qwen-VL model, achieving state-of-the-art performance in visual understanding benchmarks."
|
||
},
|
||
"Qwen/Qwen2.5-14B-Instruct": {
|
||
"description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks."
|
||
},
|
||
"Qwen/Qwen2.5-32B-Instruct": {
|
||
"description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks."
|
||
},
|
||
"Qwen/Qwen2.5-72B-Instruct": {
|
||
"description": "A large language model developed by the Alibaba Cloud Tongyi Qianwen team"
|
||
},
|
||
"Qwen/Qwen2.5-72B-Instruct-128K": {
|
||
"description": "Qwen2.5 is a new large language model series with enhanced understanding and generation capabilities."
|
||
},
|
||
"Qwen/Qwen2.5-72B-Instruct-Turbo": {
|
||
"description": "Qwen2.5 is a new large language model series designed to optimize instruction-based task processing."
|
||
},
|
||
"Qwen/Qwen2.5-7B-Instruct": {
|
||
"description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks."
|
||
},
|
||
"Qwen/Qwen2.5-7B-Instruct-Turbo": {
|
||
"description": "Qwen2.5 is a new large language model series designed to optimize instruction-based task processing."
|
||
},
|
||
"Qwen/Qwen2.5-Coder-32B-Instruct": {
|
||
"description": "Qwen2.5-Coder focuses on code writing."
|
||
},
|
||
"Qwen/Qwen2.5-Coder-7B-Instruct": {
|
||
"description": "Qwen2.5-Coder-7B-Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents."
|
||
},
|
||
"Qwen/Qwen2.5-VL-32B-Instruct": {
|
||
"description": "Qwen2.5-VL-32B-Instruct is a multimodal large language model developed by the Tongyi Qianwen team, representing part of the Qwen2.5-VL series. This model excels not only in recognizing common objects but also in analyzing text, charts, icons, graphics, and layouts within images. It functions as a visual agent capable of reasoning and dynamically manipulating tools, with the ability to operate computers and mobile devices. Additionally, the model can precisely locate objects in images and generate structured outputs for documents like invoices and tables. Compared to its predecessor Qwen2-VL, this version demonstrates enhanced mathematical and problem-solving capabilities through reinforcement learning, while also exhibiting more human-preferred response styles."
|
||
},
|
||
"Qwen/Qwen2.5-VL-72B-Instruct": {
|
||
"description": "Qwen2.5-VL is the vision-language model in the Qwen2.5 series. This model demonstrates significant improvements across multiple dimensions: enhanced visual comprehension capable of recognizing common objects, analyzing text, charts, and layouts; serving as a visual agent that can reason and dynamically guide tool usage; supporting understanding of long videos exceeding 1 hour while capturing key events; able to precisely locate objects in images by generating bounding boxes or points; and capable of producing structured outputs particularly suitable for scanned data like invoices and forms."
|
||
},
|
||
"Qwen/Qwen3-14B": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
|
||
},
|
||
"Qwen/Qwen3-235B-A22B": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
|
||
},
|
||
"Qwen/Qwen3-235B-A22B-Instruct-2507": {
|
||
"description": "Qwen3-235B-A22B-Instruct-2507 is a flagship mixture-of-experts (MoE) large language model developed by Alibaba Cloud Tongyi Qianwen team within the Qwen3 series. It has 235 billion total parameters with 22 billion activated per inference. Released as an update to the non-thinking mode Qwen3-235B-A22B, it focuses on significant improvements in instruction following, logical reasoning, text comprehension, mathematics, science, programming, and tool usage. Additionally, it enhances coverage of multilingual long-tail knowledge and better aligns with user preferences in subjective and open-ended tasks to generate more helpful and higher-quality text."
|
||
},
|
||
"Qwen/Qwen3-235B-A22B-Thinking-2507": {
|
||
"description": "Qwen3-235B-A22B-Thinking-2507 is a member of the Qwen3 large language model series developed by Alibaba Tongyi Qianwen team, specializing in complex reasoning tasks. Based on a mixture-of-experts (MoE) architecture with 235 billion total parameters and approximately 22 billion activated per token, it balances strong performance with computational efficiency. As a dedicated “thinking” model, it significantly improves performance in logic reasoning, mathematics, science, programming, and academic benchmarks requiring human expertise, ranking among the top open-source thinking models. It also enhances general capabilities such as instruction following, tool usage, and text generation, natively supports 256K long-context understanding, and is well-suited for scenarios requiring deep reasoning and long document processing."
|
||
},
|
||
"Qwen/Qwen3-30B-A3B": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
|
||
},
|
||
"Qwen/Qwen3-30B-A3B-Instruct-2507": {
|
||
"description": "Qwen3-30B-A3B-Instruct-2507 is an updated version of the Qwen3-30B-A3B non-thinking mode. It is a Mixture of Experts (MoE) model with a total of 30.5 billion parameters and 3.3 billion active parameters. The model features key enhancements across multiple areas, including significant improvements in instruction following, logical reasoning, text comprehension, mathematics, science, coding, and tool usage. Additionally, it has made substantial progress in covering long-tail multilingual knowledge and better aligns with user preferences in subjective and open-ended tasks, enabling it to generate more helpful responses and higher-quality text. Furthermore, its long-text comprehension capability has been extended to 256K tokens. This model supports only the non-thinking mode and does not generate `<think></think>` tags in its output."
|
||
},
|
||
"Qwen/Qwen3-30B-A3B-Thinking-2507": {
|
||
"description": "Qwen3-30B-A3B-Thinking-2507 is the latest “thinking” model in the Qwen3 series released by Alibaba’s Tongyi Qianwen team. As a mixture-of-experts (MoE) model with 30.5 billion total parameters and 3.3 billion active parameters, it is designed to improve capabilities for handling complex tasks. The model demonstrates significant performance gains on academic benchmarks requiring logical reasoning, mathematics, science, programming, and domain expertise. At the same time, its general abilities—such as instruction following, tool use, text generation, and alignment with human preferences—have been substantially enhanced. The model natively supports long-context understanding of 256K tokens and can scale up to 1 million tokens. This version is tailored for “thinking mode,” intended to solve highly complex problems through detailed step-by-step reasoning, and it also exhibits strong agent capabilities."
|
||
},
|
||
"Qwen/Qwen3-32B": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
|
||
},
|
||
"Qwen/Qwen3-8B": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode."
|
||
},
|
||
"Qwen/Qwen3-Coder-30B-A3B-Instruct": {
|
||
"description": "Qwen3-Coder-30B-A3B-Instruct is a code model in the Qwen3 series developed by Alibaba's Tongyi Qianwen team. As a streamlined and optimized model, it focuses on enhancing code-handling capabilities while maintaining high performance and efficiency. The model demonstrates notable advantages among open-source models on complex tasks such as agentic coding, automated browser operations, and tool invocation. It natively supports a long context of 256K tokens and can be extended up to 1M tokens, enabling better understanding and processing at the codebase level. Additionally, the model provides robust agentic coding support for platforms like Qwen Code and CLINE, and it employs a dedicated function-calling format."
|
||
},
|
||
"Qwen/Qwen3-Coder-480B-A35B-Instruct": {
|
||
"description": "Qwen3-Coder-480B-A35B-Instruct, released by Alibaba, is the most agentic code model to date. It is a mixture-of-experts (MoE) model with 480 billion total parameters and 35 billion active parameters, striking a balance between efficiency and performance. The model natively supports a 256K (~260k) token context window and can be extended to 1,000,000 tokens through extrapolation methods such as YaRN, enabling it to handle large codebases and complex programming tasks. Qwen3-Coder is designed for agent-style coding workflows: it not only generates code but can autonomously interact with development tools and environments to solve complex programming problems. On multiple benchmarks for coding and agent tasks, this model achieves top-tier results among open-source models, with performance comparable to leading models like Claude Sonnet 4."
|
||
},
|
||
"Qwen/Qwen3-Next-80B-A3B-Instruct": {
|
||
"description": "Qwen3-Next-80B-A3B-Instruct is the next-generation foundational model released by Alibaba's Tongyi Qianwen team. It is based on the brand-new Qwen3-Next architecture, designed to achieve ultimate training and inference efficiency. The model employs an innovative hybrid attention mechanism (Gated DeltaNet and Gated Attention), a highly sparse mixture-of-experts (MoE) structure, and multiple training stability optimizations. As a sparse model with a total of 80 billion parameters, it activates only about 3 billion parameters during inference, significantly reducing computational costs. When handling long-context tasks exceeding 32K tokens, its inference throughput is more than 10 times higher than the Qwen3-32B model. This model is an instruction-tuned version designed for general tasks and does not support the Thinking mode. In terms of performance, it is comparable to Tongyi Qianwen's flagship Qwen3-235B model on some benchmarks, especially demonstrating clear advantages in ultra-long context tasks."
|
||
},
|
||
"Qwen/Qwen3-Next-80B-A3B-Thinking": {
|
||
"description": "Qwen3-Next-80B-A3B-Thinking is the next-generation foundational model released by Alibaba's Tongyi Qianwen team, specifically designed for complex reasoning tasks. It is based on the innovative Qwen3-Next architecture, which integrates a hybrid attention mechanism (Gated DeltaNet and Gated Attention) and a highly sparse mixture-of-experts (MoE) structure, aiming for ultimate training and inference efficiency. As a sparse model with a total of 80 billion parameters, it activates only about 3 billion parameters during inference, greatly reducing computational costs. When processing long-context tasks exceeding 32K tokens, its throughput is more than 10 times higher than the Qwen3-32B model. This \"Thinking\" version is optimized for executing challenging multi-step tasks such as mathematical proofs, code synthesis, logical analysis, and planning, and by default outputs the reasoning process in a structured \"chain-of-thought\" format. In terms of performance, it not only surpasses higher-cost models like Qwen3-32B-Thinking but also outperforms Gemini-2.5-Flash-Thinking on multiple benchmarks."
|
||
},
|
||
"Qwen/Qwen3-Omni-30B-A3B-Captioner": {
|
||
"description": "Qwen3-Omni-30B-A3B-Captioner is a vision-language model (VLM) from Alibaba's Qwen3 series, developed by the Tongyi Qianwen team. It is specifically designed to generate high-quality, detailed, and accurate image captions. Built on a 30-billion-parameter Mixture of Experts (MoE) architecture, the model excels at understanding image content and converting it into natural, fluent textual descriptions. It demonstrates outstanding performance in capturing image details, scene understanding, object recognition, and relational reasoning, making it ideal for applications requiring precise image comprehension and caption generation."
|
||
},
|
||
"Qwen/Qwen3-Omni-30B-A3B-Instruct": {
|
||
"description": "Qwen3-Omni-30B-A3B-Instruct is part of the latest Qwen3 series from Alibaba's Tongyi Qianwen team. This Mixture of Experts (MoE) model features 30 billion total parameters and 3 billion active parameters, offering powerful performance while reducing inference costs. Trained on high-quality, diverse, and multilingual data, it boasts strong general capabilities and supports full-modality input processing—including text, images, audio, and video—enabling it to understand and generate cross-modal content."
|
||
},
|
||
"Qwen/Qwen3-Omni-30B-A3B-Thinking": {
|
||
"description": "Qwen3-Omni-30B-A3B-Thinking is the core 'Thinker' component of the Qwen3-Omni multimodal model. It is designed to handle complex chain-of-thought reasoning across multiple modalities, including text, audio, images, and video. Acting as the reasoning engine, it unifies all inputs into a shared representation space, enabling deep cross-modal understanding and sophisticated reasoning. Built on a Mixture of Experts (MoE) architecture with 30 billion total parameters and 3 billion active parameters, it balances powerful reasoning capabilities with computational efficiency."
|
||
},
|
||
"Qwen/Qwen3-VL-235B-A22B-Instruct": {
|
||
"description": "Qwen3-VL-235B-A22B-Instruct is a large instruction-tuned model in the Qwen3-VL series. Based on a Mixture of Experts (MoE) architecture, it offers exceptional multimodal understanding and generation capabilities. With native support for 256K context length, it is well-suited for high-concurrency, production-grade multimodal services."
|
||
},
|
||
"Qwen/Qwen3-VL-235B-A22B-Thinking": {
|
||
"description": "Qwen3-VL-235B-A22B-Thinking is the flagship reasoning model in the Qwen3-VL series. It is specially optimized for complex multimodal reasoning, long-context inference, and agent interaction, making it ideal for enterprise-level scenarios that demand deep reasoning and visual understanding."
|
||
},
|
||
"Qwen/Qwen3-VL-30B-A3B-Instruct": {
|
||
"description": "Qwen3-VL-30B-A3B-Instruct is an instruction-tuned model in the Qwen3-VL series, featuring powerful vision-language understanding and generation capabilities. With native support for 256K context length, it is suitable for multimodal dialogue and image-conditioned generation tasks."
|
||
},
|
||
"Qwen/Qwen3-VL-30B-A3B-Thinking": {
|
||
"description": "Qwen3-VL-30B-A3B-Thinking is the reasoning-enhanced version of Qwen3-VL. It is optimized for multimodal reasoning, image-to-code tasks, and complex visual understanding. Supporting 256K context length, it offers stronger chain-of-thought capabilities."
|
||
},
|
||
"Qwen/Qwen3-VL-32B-Instruct": {
|
||
"description": "Qwen3-VL-32B-Instruct is a vision-language model developed by Alibaba's Tongyi Qianwen team, achieving state-of-the-art (SOTA) performance across multiple vision-language benchmarks. It supports high-resolution image inputs at the megapixel level and offers robust general visual understanding, multilingual OCR, fine-grained visual localization, and visual dialogue capabilities. As part of the Qwen3 series, it is equipped to handle complex multimodal tasks and supports advanced features such as tool invocation and prefix continuation."
|
||
},
|
||
"Qwen/Qwen3-VL-32B-Thinking": {
|
||
"description": "Qwen3-VL-32B-Thinking is a specialized version of Alibaba's Qwen3 vision-language model, optimized for complex visual reasoning tasks. It features a built-in 'thinking mode' that enables the model to generate detailed intermediate reasoning steps before answering, significantly enhancing its performance on tasks requiring multi-step logic, planning, and complex inference. The model supports high-resolution image inputs at the megapixel level and offers strong general visual understanding, multilingual OCR, fine-grained visual localization, and visual dialogue capabilities, along with support for tool invocation and prefix continuation."
|
||
},
|
||
"Qwen/Qwen3-VL-8B-Instruct": {
|
||
"description": "Qwen3-VL-8B-Instruct is a vision-language model from the Qwen3 series, built on Qwen3-8B-Instruct and trained on a large corpus of image-text data. It excels at general visual understanding, vision-centric dialogue, and multilingual text recognition within images. It is well-suited for tasks such as visual question answering, image captioning, multimodal instruction following, and tool invocation."
|
||
},
|
||
"Qwen/Qwen3-VL-8B-Thinking": {
|
||
"description": "Qwen3-VL-8B-Thinking is the visual reasoning variant of the Qwen3 series, optimized for complex multi-step reasoning tasks. By default, it generates a step-by-step thinking chain before answering, enhancing reasoning accuracy. It is ideal for scenarios requiring in-depth reasoning, such as visual question answering and detailed analysis of image content."
|
||
},
|
||
"Qwen2-72B-Instruct": {
|
||
"description": "Qwen2 is the latest series of the Qwen model, supporting 128k context. Compared to the current best open-source models, Qwen2-72B significantly surpasses leading models in natural language understanding, knowledge, coding, mathematics, and multilingual capabilities."
|
||
},
|
||
"Qwen2-7B-Instruct": {
|
||
"description": "Qwen2 is the latest series of the Qwen model, capable of outperforming optimal open-source models of similar size and even larger models. Qwen2 7B has achieved significant advantages in multiple evaluations, especially in coding and Chinese comprehension."
|
||
},
|
||
"Qwen2-VL-72B": {
|
||
"description": "Qwen2-VL-72B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers."
|
||
},
|
||
"Qwen2.5-14B-Instruct": {
|
||
"description": "Qwen2.5-14B-Instruct is a large language model with 14 billion parameters, delivering excellent performance, optimized for Chinese and multilingual scenarios, and supporting applications such as intelligent Q&A and content generation."
|
||
},
|
||
"Qwen2.5-32B-Instruct": {
|
||
"description": "Qwen2.5-32B-Instruct is a large language model with 32 billion parameters, offering balanced performance, optimized for Chinese and multilingual scenarios, and supporting applications such as intelligent Q&A and content generation."
|
||
},
|
||
"Qwen2.5-72B-Instruct": {
|
||
"description": "Qwen2.5-72B-Instruct supports 16k context and generates long texts exceeding 8K. It enables seamless interaction with external systems through function calls, greatly enhancing flexibility and scalability. The model's knowledge has significantly increased, and its coding and mathematical abilities have been greatly improved, with multilingual support for over 29 languages."
|
||
},
|
||
"Qwen2.5-7B-Instruct": {
|
||
"description": "Qwen2.5-7B-Instruct is a large language model with 7 billion parameters, supporting function calls and seamless interaction with external systems, greatly enhancing flexibility and scalability. It is optimized for Chinese and multilingual scenarios, supporting applications such as intelligent Q&A and content generation."
|
||
},
|
||
"Qwen2.5-Coder-14B-Instruct": {
|
||
"description": "Qwen2.5-Coder-14B-Instruct is a large-scale pre-trained programming instruction model with strong code understanding and generation capabilities, efficiently handling various programming tasks, particularly suited for intelligent code writing, automated script generation, and programming problem-solving."
|
||
},
|
||
"Qwen2.5-Coder-32B-Instruct": {
|
||
"description": "Qwen2.5-Coder-32B-Instruct is a large language model specifically designed for code generation, code understanding, and efficient development scenarios, featuring an industry-leading 32 billion parameters to meet diverse programming needs."
|
||
},
|
||
"Qwen3-235B": {
|
||
"description": "Qwen3-235B-A22B is a Mixture of Experts (MoE) model that introduces a \"Hybrid Reasoning Mode,\" allowing users to seamlessly switch between \"Thinking Mode\" and \"Non-Thinking Mode.\" It supports understanding and reasoning in 119 languages and dialects and possesses powerful tool invocation capabilities. In comprehensive benchmarks covering overall ability, coding and mathematics, multilingual proficiency, knowledge, and reasoning, it competes with leading large models on the market such as DeepSeek R1, OpenAI o1, o3-mini, Grok 3, and Google Gemini 2.5 Pro."
|
||
},
|
||
"Qwen3-235B-A22B-Instruct-2507-FP8": {
|
||
"description": "Qwen3 235B A22B Instruct 2507: A model optimized for advanced reasoning and dialogue instructions, featuring a mixture-of-experts architecture to maintain inference efficiency at large scale."
|
||
},
|
||
"Qwen3-32B": {
|
||
"description": "Qwen3-32B is a dense model that introduces a \"Hybrid Reasoning Mode,\" enabling users to seamlessly switch between \"Thinking Mode\" and \"Non-Thinking Mode.\" Thanks to architectural improvements, increased training data, and more efficient training methods, its overall performance is comparable to that of Qwen2.5-72B."
|
||
},
|
||
"SenseChat": {
|
||
"description": "Basic version model (V4) with a context length of 4K, featuring strong general capabilities."
|
||
},
|
||
"SenseChat-128K": {
|
||
"description": "Basic version model (V4) with a context length of 128K, excelling in long text comprehension and generation tasks."
|
||
},
|
||
"SenseChat-32K": {
|
||
"description": "Basic version model (V4) with a context length of 32K, flexibly applicable to various scenarios."
|
||
},
|
||
"SenseChat-5": {
|
||
"description": "The latest version model (V5.5) with a context length of 128K shows significant improvements in mathematical reasoning, English conversation, instruction following, and long text comprehension, comparable to GPT-4o."
|
||
},
|
||
"SenseChat-5-1202": {
|
||
"description": "Based on version V5.5, this latest release shows significant improvements over the previous version in foundational Chinese and English capabilities, chat, science knowledge, humanities knowledge, writing, mathematical logic, and word count control."
|
||
},
|
||
"SenseChat-5-Cantonese": {
|
||
"description": "With a context length of 32K, it surpasses GPT-4 in Cantonese conversation comprehension and is competitive with GPT-4 Turbo in knowledge, reasoning, mathematics, and code writing across multiple domains."
|
||
},
|
||
"SenseChat-5-beta": {
|
||
"description": "Partially outperforms SenseCat-5-1202"
|
||
},
|
||
"SenseChat-Character": {
|
||
"description": "Standard version model with an 8K context length and high response speed."
|
||
},
|
||
"SenseChat-Character-Pro": {
|
||
"description": "Advanced version model with a context length of 32K, offering comprehensive capability enhancements and supporting both Chinese and English conversations."
|
||
},
|
||
"SenseChat-Turbo": {
|
||
"description": "Suitable for fast question answering and model fine-tuning scenarios."
|
||
},
|
||
"SenseChat-Turbo-1202": {
|
||
"description": "This is the latest lightweight version model, achieving over 90% of the full model's capabilities while significantly reducing inference costs."
|
||
},
|
||
"SenseChat-Vision": {
|
||
"description": "The latest version model (V5.5) supports multi-image input and fully optimizes the model's basic capabilities, achieving significant improvements in object attribute recognition, spatial relationships, action event recognition, scene understanding, emotion recognition, logical reasoning, and text understanding and generation."
|
||
},
|
||
"SenseNova-V6-5-Pro": {
|
||
"description": "With comprehensive updates to multimodal, language, and reasoning data, along with optimized training strategies, the new model achieves significant improvements in multimodal reasoning and generalized instruction-following capabilities. It supports a context window of up to 128K tokens and excels in specialized tasks such as OCR and cultural tourism IP recognition."
|
||
},
|
||
"SenseNova-V6-5-Turbo": {
|
||
"description": "With comprehensive updates to multimodal, language, and reasoning data, along with optimized training strategies, the new model achieves significant improvements in multimodal reasoning and generalized instruction-following capabilities. It supports a context window of up to 128K tokens and excels in specialized tasks such as OCR and cultural tourism IP recognition."
|
||
},
|
||
"SenseNova-V6-Pro": {
|
||
"description": "Achieves a native unification of image, text, and video capabilities, breaking through the limitations of traditional discrete multimodality, winning dual championships in the OpenCompass and SuperCLUE evaluations."
|
||
},
|
||
"SenseNova-V6-Reasoner": {
|
||
"description": "Balances visual and linguistic deep reasoning, enabling slow thinking and profound inference, presenting a complete chain of thought process."
|
||
},
|
||
"SenseNova-V6-Turbo": {
|
||
"description": "Achieves a native unification of image, text, and video capabilities, breaking through the limitations of traditional discrete multimodality, leading comprehensively in core dimensions such as multimodal foundational abilities and linguistic foundational abilities, excelling in both literature and science, and consistently ranking among the top tier in various assessments both domestically and internationally."
|
||
},
|
||
"Skylark2-lite-8k": {
|
||
"description": "Skylark 2nd generation model, Skylark2-lite model is characterized by high response speed, suitable for high real-time requirements, cost-sensitive scenarios, and situations where model accuracy is less critical, with a context window length of 8k."
|
||
},
|
||
"Skylark2-pro-32k": {
|
||
"description": "Skylark 2nd generation model, Skylark2-pro version has high model accuracy, suitable for more complex text generation scenarios such as professional field copy generation, novel writing, and high-quality translation, with a context window length of 32k."
|
||
},
|
||
"Skylark2-pro-4k": {
|
||
"description": "Skylark 2nd generation model, Skylark2-pro model has high model accuracy, suitable for more complex text generation scenarios such as professional field copy generation, novel writing, and high-quality translation, with a context window length of 4k."
|
||
},
|
||
"Skylark2-pro-character-4k": {
|
||
"description": "Skylark 2nd generation model, Skylark2-pro-character has excellent role-playing and chat capabilities, adept at engaging in conversations with users based on their prompt requests, showcasing distinct character styles and flowing dialogue, making it well-suited for building chatbots, virtual assistants, and online customer service, with high response speed."
|
||
},
|
||
"Skylark2-pro-turbo-8k": {
|
||
"description": "Skylark 2nd generation model, Skylark2-pro-turbo-8k provides faster inference at a lower cost, with a context window length of 8k."
|
||
},
|
||
"THUDM/GLM-4-32B-0414": {
|
||
"description": "GLM-4-32B-0414 is the next-generation open-source model in the GLM series, boasting 32 billion parameters. Its performance is comparable to OpenAI's GPT series and DeepSeek's V3/R1 series."
|
||
},
|
||
"THUDM/GLM-4-9B-0414": {
|
||
"description": "GLM-4-9B-0414 is a small model in the GLM series, with 9 billion parameters. This model inherits the technical characteristics of the GLM-4-32B series while providing a more lightweight deployment option. Despite its smaller size, GLM-4-9B-0414 still demonstrates excellent capabilities in tasks such as code generation, web design, SVG graphics generation, and search-based writing."
|
||
},
|
||
"THUDM/GLM-4.1V-9B-Thinking": {
|
||
"description": "GLM-4.1V-9B-Thinking is an open-source vision-language model (VLM) jointly released by Zhipu AI and Tsinghua University's KEG Lab, designed specifically for handling complex multimodal cognitive tasks. Based on the GLM-4-9B-0414 foundation model, it significantly enhances cross-modal reasoning ability and stability by introducing the Chain-of-Thought reasoning mechanism and employing reinforcement learning strategies."
|
||
},
|
||
"THUDM/GLM-Z1-32B-0414": {
|
||
"description": "GLM-Z1-32B-0414 is a reasoning model with deep thinking capabilities. This model is developed based on GLM-4-32B-0414 through cold start and extended reinforcement learning, with further training in mathematics, coding, and logic tasks. Compared to the base model, GLM-Z1-32B-0414 significantly enhances mathematical abilities and the capacity to solve complex tasks."
|
||
},
|
||
"THUDM/GLM-Z1-9B-0414": {
|
||
"description": "GLM-Z1-9B-0414 is a small model in the GLM series, with only 9 billion parameters, yet it demonstrates remarkable capabilities while maintaining the open-source tradition. Despite its smaller size, this model excels in mathematical reasoning and general tasks, leading the performance among similarly sized open-source models."
|
||
},
|
||
"THUDM/GLM-Z1-Rumination-32B-0414": {
|
||
"description": "GLM-Z1-Rumination-32B-0414 is a deep reasoning model with reflective capabilities (comparable to OpenAI's Deep Research). Unlike typical deep thinking models, reflective models engage in longer periods of deep thought to tackle more open and complex problems."
|
||
},
|
||
"THUDM/glm-4-9b-chat": {
|
||
"description": "GLM-4 9B is an open-source version that provides an optimized conversational experience for chat applications."
|
||
},
|
||
"Tongyi-Zhiwen/QwenLong-L1-32B": {
|
||
"description": "QwenLong-L1-32B is the first large reasoning model (LRM) trained with reinforcement learning for long-context tasks, optimized specifically for long-text reasoning. It achieves stable transfer from short to long contexts through a progressive context expansion reinforcement learning framework. In seven long-context document QA benchmarks, QwenLong-L1-32B outperforms flagship models like OpenAI-o3-mini and Qwen3-235B-A22B, with performance comparable to Claude-3.7-Sonnet-Thinking. The model excels in complex tasks such as mathematical reasoning, logical reasoning, and multi-hop reasoning."
|
||
},
|
||
"Yi-34B-Chat": {
|
||
"description": "Yi-1.5-34B significantly enhances mathematical logic and coding abilities by incrementally training on 500 billion high-quality tokens while maintaining the excellent general language capabilities of the original series."
|
||
},
|
||
"abab5.5-chat": {
|
||
"description": "Targeted at productivity scenarios, supporting complex task processing and efficient text generation, suitable for professional applications."
|
||
},
|
||
"abab5.5s-chat": {
|
||
"description": "Designed for Chinese persona dialogue scenarios, providing high-quality Chinese dialogue generation capabilities, suitable for various application contexts."
|
||
},
|
||
"abab6.5g-chat": {
|
||
"description": "Designed for multilingual persona dialogue, supporting high-quality dialogue generation in English and other languages."
|
||
},
|
||
"abab6.5s-chat": {
|
||
"description": "Suitable for a wide range of natural language processing tasks, including text generation and dialogue systems."
|
||
},
|
||
"abab6.5t-chat": {
|
||
"description": "Optimized for Chinese persona dialogue scenarios, providing smooth dialogue generation that aligns with Chinese expression habits."
|
||
},
|
||
"accounts/fireworks/models/deepseek-r1": {
|
||
"description": "DeepSeek-R1 is a state-of-the-art large language model optimized through reinforcement learning and cold-start data, excelling in reasoning, mathematics, and programming performance."
|
||
},
|
||
"accounts/fireworks/models/deepseek-v3": {
|
||
"description": "A powerful Mixture-of-Experts (MoE) language model provided by Deepseek, with a total parameter count of 671B, activating 37B parameters per token."
|
||
},
|
||
"accounts/fireworks/models/llama-v3-70b-instruct": {
|
||
"description": "Llama 3 70B instruction model, optimized for multilingual dialogues and natural language understanding, outperforming most competitive models."
|
||
},
|
||
"accounts/fireworks/models/llama-v3-8b-instruct": {
|
||
"description": "Llama 3 8B instruction model, optimized for dialogues and multilingual tasks, delivering outstanding and efficient performance."
|
||
},
|
||
"accounts/fireworks/models/llama-v3-8b-instruct-hf": {
|
||
"description": "Llama 3 8B instruction model (HF version), consistent with official implementation results, featuring high consistency and cross-platform compatibility."
|
||
},
|
||
"accounts/fireworks/models/llama-v3p1-405b-instruct": {
|
||
"description": "Llama 3.1 405B instruction model, equipped with massive parameters, suitable for complex tasks and instruction following in high-load scenarios."
|
||
},
|
||
"accounts/fireworks/models/llama-v3p1-70b-instruct": {
|
||
"description": "Llama 3.1 70B instruction model provides exceptional natural language understanding and generation capabilities, making it an ideal choice for dialogue and analysis tasks."
|
||
},
|
||
"accounts/fireworks/models/llama-v3p1-8b-instruct": {
|
||
"description": "Llama 3.1 8B instruction model, optimized for multilingual dialogues, capable of surpassing most open-source and closed-source models on common industry benchmarks."
|
||
},
|
||
"accounts/fireworks/models/llama-v3p2-11b-vision-instruct": {
|
||
"description": "Meta's 11B parameter instruction-tuned image reasoning model. This model is optimized for visual recognition, image reasoning, image description, and answering general questions about images. It understands visual data like charts and graphs, generating text descriptions of image details to bridge the gap between vision and language."
|
||
},
|
||
"accounts/fireworks/models/llama-v3p2-3b-instruct": {
|
||
"description": "The Llama 3.2 3B instruction model is a lightweight multilingual model introduced by Meta. This model aims to enhance efficiency, providing significant improvements in latency and cost compared to larger models. Sample use cases include querying, prompt rewriting, and writing assistance."
|
||
},
|
||
"accounts/fireworks/models/llama-v3p2-90b-vision-instruct": {
|
||
"description": "Meta's 90B parameter instruction-tuned image reasoning model. This model is optimized for visual recognition, image reasoning, image description, and answering general questions about images. It understands visual data like charts and graphs, generating text descriptions of image details to bridge the gap between vision and language."
|
||
},
|
||
"accounts/fireworks/models/llama-v3p3-70b-instruct": {
|
||
"description": "Llama 3.3 70B Instruct is the December update of Llama 3.1 70B. This model builds upon Llama 3.1 70B (released in July 2024) with enhancements in tool invocation, multilingual text support, mathematics, and programming capabilities. It achieves industry-leading performance in reasoning, mathematics, and instruction following, providing similar performance to 3.1 405B while offering significant advantages in speed and cost."
|
||
},
|
||
"accounts/fireworks/models/mistral-small-24b-instruct-2501": {
|
||
"description": "A 24B parameter model that possesses state-of-the-art capabilities comparable to larger models."
|
||
},
|
||
"accounts/fireworks/models/mixtral-8x22b-instruct": {
|
||
"description": "Mixtral MoE 8x22B instruction model, featuring large-scale parameters and a multi-expert architecture, fully supporting efficient processing of complex tasks."
|
||
},
|
||
"accounts/fireworks/models/mixtral-8x7b-instruct": {
|
||
"description": "Mixtral MoE 8x7B instruction model, with a multi-expert architecture providing efficient instruction following and execution."
|
||
},
|
||
"accounts/fireworks/models/mythomax-l2-13b": {
|
||
"description": "MythoMax L2 13B model, combining novel merging techniques, excels in narrative and role-playing."
|
||
},
|
||
"accounts/fireworks/models/phi-3-vision-128k-instruct": {
|
||
"description": "Phi 3 Vision instruction model, a lightweight multimodal model capable of handling complex visual and textual information, with strong reasoning abilities."
|
||
},
|
||
"accounts/fireworks/models/qwen-qwq-32b-preview": {
|
||
"description": "The QwQ model is an experimental research model developed by the Qwen team, focusing on enhancing AI reasoning capabilities."
|
||
},
|
||
"accounts/fireworks/models/qwen2-vl-72b-instruct": {
|
||
"description": "The 72B version of the Qwen-VL model is the latest iteration from Alibaba, representing nearly a year of innovation."
|
||
},
|
||
"accounts/fireworks/models/qwen2p5-72b-instruct": {
|
||
"description": "Qwen2.5 is a series of decoder-only language models developed by the Alibaba Cloud Qwen team. These models come in different sizes including 0.5B, 1.5B, 3B, 7B, 14B, 32B, and 72B, available in both base and instruct variants."
|
||
},
|
||
"accounts/fireworks/models/qwen2p5-coder-32b-instruct": {
|
||
"description": "Qwen2.5 Coder 32B Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents."
|
||
},
|
||
"accounts/yi-01-ai/models/yi-large": {
|
||
"description": "Yi-Large model, featuring exceptional multilingual processing capabilities, suitable for various language generation and understanding tasks."
|
||
},
|
||
"ai21-jamba-1.5-large": {
|
||
"description": "A 398B parameter (94B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation."
|
||
},
|
||
"ai21-jamba-1.5-mini": {
|
||
"description": "A 52B parameter (12B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation."
|
||
},
|
||
"ai21-labs/AI21-Jamba-1.5-Large": {
|
||
"description": "A 398B parameter (94B active) multilingual model providing a 256K long context window, function calling, structured output, and fact-based generation."
|
||
},
|
||
"ai21-labs/AI21-Jamba-1.5-Mini": {
|
||
"description": "A 52B parameter (12B active) multilingual model offering a 256K long context window, function calling, structured output, and fact-based generation."
|
||
},
|
||
"alibaba/qwen-3-14b": {
|
||
"description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
|
||
},
|
||
"alibaba/qwen-3-235b": {
|
||
"description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
|
||
},
|
||
"alibaba/qwen-3-30b": {
|
||
"description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
|
||
},
|
||
"alibaba/qwen-3-32b": {
|
||
"description": "Qwen3 is the latest generation large language model in the Qwen series, offering a comprehensive set of dense and Mixture of Experts (MoE) models. Built on extensive training, Qwen3 delivers breakthrough advancements in reasoning, instruction following, agent capabilities, and multilingual support."
|
||
},
|
||
"alibaba/qwen3-coder": {
|
||
"description": "Qwen3-Coder-480B-A35B-Instruct is Qwen's most agent-capable code model, demonstrating remarkable performance in agent coding, agent browser usage, and other fundamental coding tasks, achieving results comparable to Claude Sonnet."
|
||
},
|
||
"amazon/nova-lite": {
|
||
"description": "A very low-cost multimodal model that processes image, video, and text inputs at extremely high speed."
|
||
},
|
||
"amazon/nova-micro": {
|
||
"description": "A text-only model delivering the lowest latency responses at a very low cost."
|
||
},
|
||
"amazon/nova-pro": {
|
||
"description": "A highly capable multimodal model offering the best combination of accuracy, speed, and cost, suitable for a wide range of tasks."
|
||
},
|
||
"amazon/titan-embed-text-v2": {
|
||
"description": "Amazon Titan Text Embeddings V2 is a lightweight, efficient multilingual embedding model supporting 1024, 512, and 256 dimensions."
|
||
},
|
||
"anthropic.claude-3-5-sonnet-20240620-v1:0": {
|
||
"description": "Claude 3.5 Sonnet raises the industry standard, outperforming competitor models and Claude 3 Opus, excelling in a wide range of evaluations while maintaining the speed and cost of our mid-tier models."
|
||
},
|
||
"anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||
"description": "Claude 3.5 Sonnet raises the industry standard, outperforming competing models and Claude 3 Opus, excelling in extensive evaluations while maintaining the speed and cost of our mid-tier models."
|
||
},
|
||
"anthropic.claude-3-haiku-20240307-v1:0": {
|
||
"description": "Claude 3 Haiku is Anthropic's fastest and most compact model, providing near-instantaneous response times. It can quickly answer simple queries and requests. Customers will be able to build seamless AI experiences that mimic human interaction. Claude 3 Haiku can process images and return text output, with a context window of 200K."
|
||
},
|
||
"anthropic.claude-3-opus-20240229-v1:0": {
|
||
"description": "Claude 3 Opus is Anthropic's most powerful AI model, featuring state-of-the-art performance on highly complex tasks. It can handle open-ended prompts and unseen scenarios, demonstrating exceptional fluency and human-like understanding. Claude 3 Opus showcases the forefront of generative AI possibilities. Claude 3 Opus can process images and return text output, with a context window of 200K."
|
||
},
|
||
"anthropic.claude-3-sonnet-20240229-v1:0": {
|
||
"description": "Anthropic's Claude 3 Sonnet strikes an ideal balance between intelligence and speed—especially suited for enterprise workloads. It offers maximum utility at a price lower than competitors and is designed to be a reliable, durable workhorse for scalable AI deployments. Claude 3 Sonnet can process images and return text output, with a context window of 200K."
|
||
},
|
||
"anthropic.claude-instant-v1": {
|
||
"description": "A fast, economical, yet still highly capable model that can handle a range of tasks, including everyday conversations, text analysis, summarization, and document Q&A."
|
||
},
|
||
"anthropic.claude-v2": {
|
||
"description": "Anthropic's model demonstrates high capability across a wide range of tasks, from complex conversations and creative content generation to detailed instruction following."
|
||
},
|
||
"anthropic.claude-v2:1": {
|
||
"description": "An updated version of Claude 2, featuring double the context window and improvements in reliability, hallucination rates, and evidence-based accuracy in long documents and RAG contexts."
|
||
},
|
||
"anthropic/claude-3-haiku": {
|
||
"description": "Claude 3 Haiku is Anthropic's fastest model to date, designed for enterprise workloads that typically involve longer prompts. Haiku can quickly analyze large volumes of documents such as quarterly filings, contracts, or legal cases, at half the cost of other models in its performance tier."
|
||
},
|
||
"anthropic/claude-3-opus": {
|
||
"description": "Claude 3 Opus is Anthropic's smartest model, delivering market-leading performance on highly complex tasks. It navigates open-ended prompts and novel scenarios with exceptional fluency and human-like understanding."
|
||
},
|
||
"anthropic/claude-3.5-haiku": {
|
||
"description": "Claude 3.5 Haiku is the next generation of our fastest model. Matching the speed of Claude 3 Haiku, it improves across every skill set and surpasses our previous largest model Claude 3 Opus on many intelligence benchmarks."
|
||
},
|
||
"anthropic/claude-3.5-sonnet": {
|
||
"description": "Claude 3.5 Sonnet strikes an ideal balance between intelligence and speed—especially for enterprise workloads. It delivers powerful performance at lower cost compared to peers and is designed for high durability in large-scale AI deployments."
|
||
},
|
||
"anthropic/claude-3.7-sonnet": {
|
||
"description": "Claude 3.7 Sonnet is the first hybrid reasoning model and Anthropic's smartest model to date. It offers state-of-the-art performance in coding, content generation, data analysis, and planning tasks, building on the software engineering and computer usage capabilities of its predecessor Claude 3.5 Sonnet."
|
||
},
|
||
"anthropic/claude-opus-4": {
|
||
"description": "Claude Opus 4 is Anthropic's most powerful model yet and the world's best coding model, leading on SWE-bench (72.5%) and Terminal-bench (43.2%). It provides sustained performance for long-term tasks requiring focused effort and thousands of steps, capable of continuous operation for hours—significantly extending AI agent capabilities."
|
||
},
|
||
"anthropic/claude-opus-4.1": {
|
||
"description": "Claude Opus 4.1 is a plug-and-play alternative to Opus 4, delivering excellent performance and accuracy for practical coding and agent tasks. Opus 4.1 advances state-of-the-art coding performance to 74.5% on SWE-bench Verified, handling complex multi-step problems with greater rigor and attention to detail."
|
||
},
|
||
"anthropic/claude-sonnet-4": {
|
||
"description": "Claude Sonnet 4 significantly improves upon the industry-leading capabilities of Sonnet 3.7, excelling in coding with state-of-the-art 72.7% on SWE-bench. The model balances performance and efficiency, suitable for both internal and external use cases, and offers enhanced controllability for greater command over outcomes."
|
||
},
|
||
"anthropic/claude-sonnet-4.5": {
|
||
"description": "Claude Sonnet 4.5 is Anthropic's most intelligent model to date."
|
||
},
|
||
"ascend-tribe/pangu-pro-moe": {
|
||
"description": "Pangu-Pro-MoE 72B-A16B is a sparse large language model with 72 billion parameters and 16 billion activated parameters. It is based on the Group Mixture of Experts (MoGE) architecture, which groups experts during the expert selection phase and constrains tokens to activate an equal number of experts within each group, achieving expert load balancing and significantly improving deployment efficiency on the Ascend platform."
|
||
},
|
||
"aya": {
|
||
"description": "Aya 23 is a multilingual model launched by Cohere, supporting 23 languages, facilitating diverse language applications."
|
||
},
|
||
"aya:35b": {
|
||
"description": "Aya 23 is a multilingual model launched by Cohere, supporting 23 languages, facilitating diverse language applications."
|
||
},
|
||
"azure-DeepSeek-R1-0528": {
|
||
"description": "Deployed and provided by Microsoft; the DeepSeek R1 model has undergone a minor version upgrade, currently at DeepSeek-R1-0528. In the latest update, DeepSeek R1 significantly improves inference depth and reasoning ability by increasing computational resources and introducing algorithmic optimizations in the post-training phase. This model excels in benchmarks including mathematics, programming, and general logic, with overall performance approaching leading models such as O3 and Gemini 2.5 Pro."
|
||
},
|
||
"baichuan-m2-32b": {
|
||
"description": "Baichuan M2 32B is a Mixture of Experts model developed by Baichuan Intelligence, featuring powerful reasoning capabilities."
|
||
},
|
||
"baichuan/baichuan2-13b-chat": {
|
||
"description": "Baichuan-13B is an open-source, commercially usable large language model developed by Baichuan Intelligence, containing 13 billion parameters, achieving the best results in its size on authoritative Chinese and English benchmarks."
|
||
},
|
||
"baidu/ERNIE-4.5-300B-A47B": {
|
||
"description": "ERNIE-4.5-300B-A47B is a large language model developed by Baidu based on a Mixture of Experts (MoE) architecture. The model has a total of 300 billion parameters, but only activates 47 billion parameters per token during inference, balancing powerful performance with computational efficiency. As a core model in the ERNIE 4.5 series, it demonstrates outstanding capabilities in text understanding, generation, reasoning, and programming tasks. The model employs an innovative multimodal heterogeneous MoE pretraining method, jointly training text and visual modalities to effectively enhance overall capabilities, especially excelling in instruction following and world knowledge retention."
|
||
},
|
||
"c4ai-aya-expanse-32b": {
|
||
"description": "Aya Expanse is a high-performance 32B multilingual model designed to challenge the performance of single-language models through innovations in instruction tuning, data arbitrage, preference training, and model merging. It supports 23 languages."
|
||
},
|
||
"c4ai-aya-expanse-8b": {
|
||
"description": "Aya Expanse is a high-performance 8B multilingual model designed to challenge the performance of single-language models through innovations in instruction tuning, data arbitrage, preference training, and model merging. It supports 23 languages."
|
||
},
|
||
"c4ai-aya-vision-32b": {
|
||
"description": "Aya Vision is a state-of-the-art multimodal model that excels in multiple key benchmarks for language, text, and image capabilities. This 32 billion parameter version focuses on cutting-edge multilingual performance and supports 23 languages."
|
||
},
|
||
"c4ai-aya-vision-8b": {
|
||
"description": "Aya Vision is a state-of-the-art multimodal model that excels in multiple key benchmarks for language, text, and image capabilities. This 8 billion parameter version focuses on low latency and optimal performance."
|
||
},
|
||
"charglm-3": {
|
||
"description": "CharGLM-3 is designed for role-playing and emotional companionship, supporting ultra-long multi-turn memory and personalized dialogue, with wide applications."
|
||
},
|
||
"charglm-4": {
|
||
"description": "CharGLM-4 is designed for role-playing and emotional companionship, supporting ultra-long multi-turn memory and personalized dialogue, with wide-ranging applications."
|
||
},
|
||
"chatgpt-4o-latest": {
|
||
"description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
|
||
},
|
||
"claude-2.0": {
|
||
"description": "Claude 2 provides advancements in key capabilities for enterprises, including industry-leading 200K token context, significantly reducing the occurrence of model hallucinations, system prompts, and a new testing feature: tool invocation."
|
||
},
|
||
"claude-2.1": {
|
||
"description": "Claude 2 provides advancements in key capabilities for enterprises, including industry-leading 200K token context, significantly reducing the occurrence of model hallucinations, system prompts, and a new testing feature: tool invocation."
|
||
},
|
||
"claude-3-5-haiku-20241022": {
|
||
"description": "Claude 3.5 Haiku is Anthropic's fastest next-generation model. Compared to Claude 3 Haiku, Claude 3.5 Haiku has improved in various skills and has surpassed the previous generation's largest model, Claude 3 Opus, in many intelligence benchmark tests."
|
||
},
|
||
"claude-3-5-haiku-latest": {
|
||
"description": "Claude 3.5 Haiku offers fast responses, ideal for lightweight tasks."
|
||
},
|
||
"claude-3-5-sonnet-20240620": {
|
||
"description": "Claude 3.5 Sonnet offers capabilities that surpass Opus and faster speeds than Sonnet, while maintaining the same price as Sonnet. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
|
||
},
|
||
"claude-3-5-sonnet-20241022": {
|
||
"description": "Claude 3.5 Sonnet offers capabilities that surpass Opus and faster speeds than Sonnet, while maintaining the same pricing as Sonnet. Sonnet excels particularly in programming, data science, visual processing, and agent tasks."
|
||
},
|
||
"claude-3-7-sonnet-20250219": {
|
||
"description": "Claude 3.7 Sonnet is Anthropic's latest model, offering a balance of speed and performance. It excels in a wide range of tasks, including programming, data science, visual processing, and agent tasks."
|
||
},
|
||
"claude-3-7-sonnet-latest": {
|
||
"description": "Claude 3.7 Sonnet is Anthropic's latest and most powerful model for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
|
||
},
|
||
"claude-3-haiku-20240307": {
|
||
"description": "Claude 3 Haiku is Anthropic's fastest and most compact model, designed for near-instantaneous responses. It features rapid and accurate directional performance."
|
||
},
|
||
"claude-3-opus-20240229": {
|
||
"description": "Claude 3 Opus is Anthropic's most powerful model for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
|
||
},
|
||
"claude-3-sonnet-20240229": {
|
||
"description": "Claude 3 Sonnet provides an ideal balance of intelligence and speed for enterprise workloads. It offers maximum utility at a lower price, reliable and suitable for large-scale deployment."
|
||
},
|
||
"claude-haiku-4-5-20251001": {
|
||
"description": "Claude Haiku 4.5 is Anthropic's fastest and most intelligent Haiku model, offering lightning-fast speed and advanced reasoning capabilities."
|
||
},
|
||
"claude-opus-4-1-20250805": {
|
||
"description": "Claude Opus 4.1 is Anthropic's latest and most powerful model designed for handling highly complex tasks. It demonstrates outstanding performance in intelligence, fluency, and comprehension."
|
||
},
|
||
"claude-opus-4-1-20250805-thinking": {
|
||
"description": "Claude Opus 4.1 Thinking model, an advanced version capable of demonstrating its reasoning process."
|
||
},
|
||
"claude-opus-4-20250514": {
|
||
"description": "Claude Opus 4 is Anthropic's most powerful model for handling highly complex tasks. It excels in performance, intelligence, fluency, and comprehension."
|
||
},
|
||
"claude-sonnet-4-20250514": {
|
||
"description": "Claude Sonnet 4 can generate near-instant responses or extended step-by-step reasoning, allowing users to clearly observe these processes."
|
||
},
|
||
"claude-sonnet-4-20250514-thinking": {
|
||
"description": "Claude Sonnet 4 Thinking model can produce near-instant responses or extended step-by-step reasoning, enabling users to clearly see these processes."
|
||
},
|
||
"claude-sonnet-4-5-20250929": {
|
||
"description": "Claude Sonnet 4.5 is Anthropic's most intelligent model to date."
|
||
},
|
||
"codegeex-4": {
|
||
"description": "CodeGeeX-4 is a powerful AI programming assistant that supports intelligent Q&A and code completion in various programming languages, enhancing development efficiency."
|
||
},
|
||
"codegeex4-all-9b": {
|
||
"description": "CodeGeeX4-ALL-9B is a multilingual code generation model that supports comprehensive functions including code completion and generation, code interpretation, web search, function calls, and repository-level code Q&A, covering various scenarios in software development. It is a top-tier code generation model with fewer than 10B parameters."
|
||
},
|
||
"codegemma": {
|
||
"description": "CodeGemma is a lightweight language model dedicated to various programming tasks, supporting rapid iteration and integration."
|
||
},
|
||
"codegemma:2b": {
|
||
"description": "CodeGemma is a lightweight language model dedicated to various programming tasks, supporting rapid iteration and integration."
|
||
},
|
||
"codellama": {
|
||
"description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
|
||
},
|
||
"codellama/CodeLlama-34b-Instruct-hf": {
|
||
"description": "Code Llama is an LLM focused on code generation and discussion, with extensive support for various programming languages, suitable for developer environments."
|
||
},
|
||
"codellama:13b": {
|
||
"description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
|
||
},
|
||
"codellama:34b": {
|
||
"description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
|
||
},
|
||
"codellama:70b": {
|
||
"description": "Code Llama is an LLM focused on code generation and discussion, combining extensive programming language support, suitable for developer environments."
|
||
},
|
||
"codeqwen": {
|
||
"description": "CodeQwen1.5 is a large language model trained on extensive code data, specifically designed to solve complex programming tasks."
|
||
},
|
||
"codestral": {
|
||
"description": "Codestral is Mistral AI's first code model, providing excellent support for code generation tasks."
|
||
},
|
||
"codestral-latest": {
|
||
"description": "Codestral is a cutting-edge generative model focused on code generation, optimized for intermediate filling and code completion tasks."
|
||
},
|
||
"codex-mini-latest": {
|
||
"description": "codex-mini-latest is a fine-tuned version of o4-mini, specifically designed for Codex CLI. For direct API usage, we recommend starting with gpt-4.1."
|
||
},
|
||
"cogview-4": {
|
||
"description": "CogView-4 is Zhipu's first open-source text-to-image model supporting Chinese character generation. It offers comprehensive improvements in semantic understanding, image generation quality, and bilingual Chinese-English text generation capabilities. It supports bilingual input of any length and can generate images at any resolution within a specified range."
|
||
},
|
||
"cohere-command-r": {
|
||
"description": "Command R is a scalable generative model targeting RAG and Tool Use to enable production-scale AI for enterprises."
|
||
},
|
||
"cohere-command-r-plus": {
|
||
"description": "Command R+ is a state-of-the-art RAG-optimized model designed to tackle enterprise-grade workloads."
|
||
},
|
||
"cohere/Cohere-command-r": {
|
||
"description": "Command R is a scalable generative model designed for RAG and tool usage, enabling enterprises to achieve production-grade AI."
|
||
},
|
||
"cohere/Cohere-command-r-plus": {
|
||
"description": "Command R+ is a state-of-the-art RAG-optimized model designed to handle enterprise-level workloads."
|
||
},
|
||
"cohere/command-a": {
|
||
"description": "Command A is Cohere's most powerful model to date, excelling in tool use, agents, retrieval-augmented generation (RAG), and multilingual use cases. With a context length of 256K, it runs on just two GPUs and achieves 150% higher throughput compared to Command R+ 08-2024."
|
||
},
|
||
"cohere/command-r": {
|
||
"description": "Command R is a large language model optimized for conversational interactions and long-context tasks. Positioned in the \"scalable\" category, it balances high performance and strong accuracy, enabling companies to move beyond proof of concept into production."
|
||
},
|
||
"cohere/command-r-plus": {
|
||
"description": "Command R+ is Cohere's latest large language model optimized for conversational interactions and long-context tasks. It aims for exceptional performance, enabling companies to transition from proof of concept to production."
|
||
},
|
||
"cohere/embed-v4.0": {
|
||
"description": "A model that enables classification or embedding transformation of text, images, or mixed content."
|
||
},
|
||
"comfyui/flux-dev": {
|
||
"description": "FLUX.1 Dev - High-quality text-to-image model, generates in 10–50 steps, ideal for high-end creative and artistic image generation."
|
||
},
|
||
"comfyui/flux-kontext-dev": {
|
||
"description": "FLUX.1 Kontext-dev - Image editing model that supports modifying existing images based on text instructions, including localized edits and style transfer."
|
||
},
|
||
"comfyui/flux-krea-dev": {
|
||
"description": "FLUX.1 Krea-dev - Enhanced safety text-to-image model developed in collaboration with Krea, featuring built-in safety filters."
|
||
},
|
||
"comfyui/flux-schnell": {
|
||
"description": "FLUX.1 Schnell - Ultra-fast text-to-image model capable of generating high-quality images in just 1–4 steps, ideal for real-time applications and rapid prototyping."
|
||
},
|
||
"comfyui/stable-diffusion-15": {
|
||
"description": "Stable Diffusion 1.5 text-to-image model, classic 512x512 resolution generation, suitable for quick prototyping and creative experimentation."
|
||
},
|
||
"comfyui/stable-diffusion-35": {
|
||
"description": "Stable Diffusion 3.5 next-generation text-to-image model, available in Large and Medium versions, requires external CLIP encoder files, delivers exceptional image quality and prompt alignment."
|
||
},
|
||
"comfyui/stable-diffusion-35-inclclip": {
|
||
"description": "Stable Diffusion 3.5 with built-in CLIP/T5 encoders, no need for external encoder files, compatible with models like sd3.5_medium_incl_clips, optimized for lower resource usage."
|
||
},
|
||
"comfyui/stable-diffusion-custom": {
|
||
"description": "Custom SD text-to-image model. Please name the model file as custom_sd_lobe.safetensors. If using a VAE, name it custom_sd_vae_lobe.safetensors. Model files must be placed in the designated folder as required by Comfy."
|
||
},
|
||
"comfyui/stable-diffusion-custom-refiner": {
|
||
"description": "Custom SDXL image-to-image model. Please name the model file as custom_sd_lobe.safetensors. If using a VAE, name it custom_sd_vae_lobe.safetensors. Model files must be placed in the designated folder as required by Comfy."
|
||
},
|
||
"comfyui/stable-diffusion-refiner": {
|
||
"description": "SDXL image-to-image model for high-quality transformations based on input images, supporting style transfer, image restoration, and creative modifications."
|
||
},
|
||
"comfyui/stable-diffusion-xl": {
|
||
"description": "SDXL text-to-image model supporting high-resolution 1024x1024 generation, offering superior image quality and detail rendering."
|
||
},
|
||
"command": {
|
||
"description": "An instruction-following dialogue model that delivers high quality and reliability in language tasks, with a longer context length compared to our base generation models."
|
||
},
|
||
"command-a-03-2025": {
|
||
"description": "Command A is our most powerful model to date, excelling in tool usage, agent tasks, retrieval-augmented generation (RAG), and multilingual applications. Command A features a context length of 256K and can run on just two GPUs, achieving a 150% increase in throughput compared to Command R+ 08-2024."
|
||
},
|
||
"command-light": {
|
||
"description": "A smaller, faster version of Command that is nearly as powerful but operates at a higher speed."
|
||
},
|
||
"command-light-nightly": {
|
||
"description": "To shorten the time interval between major version releases, we have launched nightly versions of the Command model. For the command-light series, this version is called command-light-nightly. Please note that command-light-nightly is the latest, most experimental, and (potentially) unstable version. Nightly versions are updated regularly without prior notice, so they are not recommended for production use."
|
||
},
|
||
"command-nightly": {
|
||
"description": "To shorten the time interval between major version releases, we have launched nightly versions of the Command model. For the Command series, this version is called command-cightly. Please note that command-nightly is the latest, most experimental, and (potentially) unstable version. Nightly versions are updated regularly without prior notice, so they are not recommended for production use."
|
||
},
|
||
"command-r": {
|
||
"description": "Command R is an LLM optimized for dialogue and long context tasks, particularly suitable for dynamic interactions and knowledge management."
|
||
},
|
||
"command-r-03-2024": {
|
||
"description": "Command R is an instruction-following dialogue model that provides higher quality and reliability in language tasks, with a longer context length than previous models. It can be used for complex workflows such as code generation, retrieval-augmented generation (RAG), tool usage, and agent tasks."
|
||
},
|
||
"command-r-08-2024": {
|
||
"description": "command-r-08-2024 is an updated version of the Command R model, released in August 2024."
|
||
},
|
||
"command-r-plus": {
|
||
"description": "Command R+ is a high-performance large language model designed for real enterprise scenarios and complex applications."
|
||
},
|
||
"command-r-plus-04-2024": {
|
||
"description": "Command R+ is an instruction-following dialogue model that delivers higher quality and reliability in language tasks, with a longer context length than previous models. It is best suited for complex RAG workflows and multi-step tool usage."
|
||
},
|
||
"command-r-plus-08-2024": {
|
||
"description": "Command R+ is an instruction-following conversational model that delivers higher quality and reliability in language tasks, with a longer context length compared to previous models. It is best suited for complex RAG workflows and multi-step tool usage."
|
||
},
|
||
"command-r7b-12-2024": {
|
||
"description": "command-r7b-12-2024 is a compact and efficient updated version, released in December 2024. It excels in tasks requiring complex reasoning and multi-step processing, such as RAG, tool usage, and agent tasks."
|
||
},
|
||
"computer-use-preview": {
|
||
"description": "The computer-use-preview model is a dedicated model designed for \"computer usage tools,\" trained to understand and execute computer-related tasks."
|
||
},
|
||
"dall-e-2": {
|
||
"description": "The second generation DALL·E model, supporting more realistic and accurate image generation, with a resolution four times that of the first generation."
|
||
},
|
||
"dall-e-3": {
|
||
"description": "The latest DALL·E model, released in November 2023. It supports more realistic and accurate image generation with enhanced detail representation."
|
||
},
|
||
"databricks/dbrx-instruct": {
|
||
"description": "DBRX Instruct provides highly reliable instruction processing capabilities, supporting applications across multiple industries."
|
||
},
|
||
"deepseek-ai/DeepSeek-OCR": {
|
||
"description": "DeepSeek-OCR is a vision-language model developed by DeepSeek AI, focused on Optical Character Recognition (OCR) and 'contextual optical compression.' The model explores the limits of compressing contextual information from images and efficiently processes documents into structured text formats such as Markdown. It accurately recognizes textual content within images, making it particularly suitable for document digitization, text extraction, and structured data processing applications."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1": {
|
||
"description": "DeepSeek-R1 is a reinforcement learning (RL) driven inference model that addresses issues of repetitiveness and readability within the model. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematical, coding, and reasoning tasks, and enhances overall effectiveness through meticulously designed training methods."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1-0528": {
|
||
"description": "DeepSeek R1 significantly enhances its reasoning and inference depth by leveraging increased computational resources and introducing algorithmic optimizations during post-training. The model performs excellently across various benchmarks, including mathematics, programming, and general logic. Its overall performance now approaches leading models such as O3 and Gemini 2.5 Pro."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1-0528-Qwen3-8B": {
|
||
"description": "DeepSeek-R1-0528-Qwen3-8B is a model distilled from DeepSeek-R1-0528's chain of thought into Qwen3 8B Base. It achieves state-of-the-art (SOTA) performance among open-source models, surpassing Qwen3 8B by 10% in the AIME 2024 test and reaching the performance level of Qwen3-235B-thinking. The model excels in mathematics reasoning, programming, and general logic benchmarks. It shares the same architecture as Qwen3-8B but uses the tokenizer configuration from DeepSeek-R1-0528."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1-Distill-Llama-70B": {
|
||
"description": "The DeepSeek-R1 distillation model optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": {
|
||
"description": "The DeepSeek-R1 distillation model optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": {
|
||
"description": "The DeepSeek-R1 distillation model optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": {
|
||
"description": "DeepSeek-R1-Distill-Qwen-32B is a model obtained through knowledge distillation based on Qwen2.5-32B. This model is fine-tuned using 800,000 selected samples generated by DeepSeek-R1, demonstrating exceptional performance in mathematics, programming, and reasoning across multiple domains. It has achieved excellent results in various benchmark tests, including a 94.3% accuracy rate on MATH-500, showcasing strong mathematical reasoning capabilities."
|
||
},
|
||
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": {
|
||
"description": "DeepSeek-R1-Distill-Qwen-7B is a model obtained through knowledge distillation based on Qwen2.5-Math-7B. This model is fine-tuned using 800,000 selected samples generated by DeepSeek-R1, demonstrating excellent reasoning capabilities. It has performed outstandingly in multiple benchmark tests, achieving a 92.8% accuracy rate on MATH-500, a 55.5% pass rate on AIME 2024, and a score of 1189 on CodeForces, showcasing strong mathematical and programming abilities as a 7B scale model."
|
||
},
|
||
"deepseek-ai/DeepSeek-V2.5": {
|
||
"description": "DeepSeek V2.5 combines the excellent features of previous versions, enhancing general and coding capabilities."
|
||
},
|
||
"deepseek-ai/DeepSeek-V3": {
|
||
"description": "DeepSeek-V3 is a mixture of experts (MoE) language model with 671 billion parameters, utilizing multi-head latent attention (MLA) and the DeepSeekMoE architecture, combined with a load balancing strategy that does not rely on auxiliary loss, optimizing inference and training efficiency. Pre-trained on 14.8 trillion high-quality tokens and fine-tuned with supervision and reinforcement learning, DeepSeek-V3 outperforms other open-source models and approaches leading closed-source models in performance."
|
||
},
|
||
"deepseek-ai/DeepSeek-V3.1": {
|
||
"description": "The DeepSeek V3.1 model features a hybrid reasoning architecture that supports both reasoning and non-reasoning modes."
|
||
},
|
||
"deepseek-ai/DeepSeek-V3.1-Terminus": {
|
||
"description": "DeepSeek-V3.1-Terminus is an updated version of the V3.1 model released by DeepSeek, positioned as a hybrid agent large language model. This update focuses on fixing user-reported issues and improving stability while maintaining the model's original capabilities. It significantly enhances language consistency, reducing the mixing of Chinese and English and the occurrence of abnormal characters. The model integrates both \"Thinking Mode\" and \"Non-thinking Mode,\" allowing users to switch flexibly between chat templates to suit different tasks. As a key optimization, V3.1-Terminus improves the performance of the Code Agent and Search Agent, making tool invocation and multi-step complex task execution more reliable."
|
||
},
|
||
"deepseek-ai/DeepSeek-V3.2-Exp": {
|
||
"description": "DeepSeek-V3.2-Exp is an experimental version released by DeepSeek as an intermediate step toward the next-generation architecture. Building on V3.1-Terminus, it introduces the DeepSeek Sparse Attention (DSA) mechanism to enhance training and inference efficiency for long-context scenarios. It features targeted optimizations for tool use, long-document comprehension, and multi-step reasoning. V3.2-Exp serves as a bridge between research and production, ideal for users seeking higher inference efficiency in high-context-budget applications."
|
||
},
|
||
"deepseek-ai/deepseek-llm-67b-chat": {
|
||
"description": "DeepSeek 67B is an advanced model trained for highly complex conversations."
|
||
},
|
||
"deepseek-ai/deepseek-r1": {
|
||
"description": "A state-of-the-art efficient LLM skilled in reasoning, mathematics, and programming."
|
||
},
|
||
"deepseek-ai/deepseek-v3.1": {
|
||
"description": "DeepSeek V3.1: The next-generation reasoning model that enhances complex reasoning and chain-of-thought capabilities, suitable for tasks requiring in-depth analysis."
|
||
},
|
||
"deepseek-ai/deepseek-v3.1-terminus": {
|
||
"description": "DeepSeek V3.1: A next-generation reasoning model designed to enhance complex reasoning and chain-of-thought capabilities, ideal for tasks requiring in-depth analysis."
|
||
},
|
||
"deepseek-ai/deepseek-vl2": {
|
||
"description": "DeepSeek-VL2 is a mixture of experts (MoE) visual language model developed based on DeepSeekMoE-27B, employing a sparsely activated MoE architecture that achieves outstanding performance while activating only 4.5 billion parameters. This model excels in various tasks, including visual question answering, optical character recognition, document/table/chart understanding, and visual localization."
|
||
},
|
||
"deepseek-chat": {
|
||
"description": "A new open-source model that integrates general and coding capabilities, retaining the general conversational abilities of the original Chat model and the powerful code handling capabilities of the Coder model, while better aligning with human preferences. Additionally, DeepSeek-V2.5 has achieved significant improvements in writing tasks, instruction following, and more."
|
||
},
|
||
"deepseek-coder-33B-instruct": {
|
||
"description": "DeepSeek Coder 33B is a code language model trained on 20 trillion data points, of which 87% are code and 13% are in Chinese and English. The model introduces a 16K window size and fill-in-the-blank tasks, providing project-level code completion and snippet filling capabilities."
|
||
},
|
||
"deepseek-coder-v2": {
|
||
"description": "DeepSeek Coder V2 is an open-source hybrid expert code model that performs excellently in coding tasks, comparable to GPT4-Turbo."
|
||
},
|
||
"deepseek-coder-v2:236b": {
|
||
"description": "DeepSeek Coder V2 is an open-source hybrid expert code model that performs excellently in coding tasks, comparable to GPT4-Turbo."
|
||
},
|
||
"deepseek-r1": {
|
||
"description": "DeepSeek-R1 is a reinforcement learning (RL) driven inference model that addresses issues of repetitiveness and readability within the model. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematical, coding, and reasoning tasks, and enhances overall effectiveness through meticulously designed training methods."
|
||
},
|
||
"deepseek-r1-0528": {
|
||
"description": "The full-capacity 685B model released on May 28, 2025. DeepSeek-R1 extensively employs reinforcement learning during post-training, significantly enhancing reasoning capabilities with minimal labeled data. It demonstrates strong performance in mathematics, coding, and natural language reasoning tasks."
|
||
},
|
||
"deepseek-r1-70b-fast-online": {
|
||
"description": "DeepSeek R1 70B fast version, supporting real-time online search, providing faster response times while maintaining model performance."
|
||
},
|
||
"deepseek-r1-70b-online": {
|
||
"description": "DeepSeek R1 70B standard version, supporting real-time online search, suitable for dialogue and text processing tasks that require the latest information."
|
||
},
|
||
"deepseek-r1-distill-llama": {
|
||
"description": "deepseek-r1-distill-llama is a model distilled from DeepSeek-R1 based on Llama."
|
||
},
|
||
"deepseek-r1-distill-llama-70b": {
|
||
"description": "DeepSeek R1—the larger and smarter model in the DeepSeek suite—has been distilled into the Llama 70B architecture. Based on benchmark tests and human evaluations, this model is smarter than the original Llama 70B, especially excelling in tasks requiring mathematical and factual accuracy."
|
||
},
|
||
"deepseek-r1-distill-llama-8b": {
|
||
"description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
|
||
},
|
||
"deepseek-r1-distill-qianfan-llama-70b": {
|
||
"description": "First released on February 14, 2025, distilled by the Qianfan model development team using Llama3_70B as the base model (Built with Meta Llama), with Qianfan's corpus also added to the distilled data."
|
||
},
|
||
"deepseek-r1-distill-qianfan-llama-8b": {
|
||
"description": "First released on February 14, 2025, distilled by the Qianfan model development team using Llama3_8B as the base model (Built with Meta Llama), with Qianfan's corpus also added to the distilled data."
|
||
},
|
||
"deepseek-r1-distill-qwen": {
|
||
"description": "deepseek-r1-distill-qwen is a model distilled from DeepSeek-R1 based on Qwen."
|
||
},
|
||
"deepseek-r1-distill-qwen-1.5b": {
|
||
"description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
|
||
},
|
||
"deepseek-r1-distill-qwen-14b": {
|
||
"description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
|
||
},
|
||
"deepseek-r1-distill-qwen-32b": {
|
||
"description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
|
||
},
|
||
"deepseek-r1-distill-qwen-7b": {
|
||
"description": "The DeepSeek-R1-Distill series models are fine-tuned versions of samples generated by DeepSeek-R1, using knowledge distillation techniques on open-source models like Qwen and Llama."
|
||
},
|
||
"deepseek-r1-fast-online": {
|
||
"description": "DeepSeek R1 full fast version, supporting real-time online search, combining the powerful capabilities of 671B parameters with faster response times."
|
||
},
|
||
"deepseek-r1-online": {
|
||
"description": "DeepSeek R1 full version, with 671B parameters, supporting real-time online search, offering enhanced understanding and generation capabilities."
|
||
},
|
||
"deepseek-reasoner": {
|
||
"description": "DeepSeek V3.2 Thinking Mode. Before outputting the final answer, the model first generates a chain of thought to improve the accuracy of the final response."
|
||
},
|
||
"deepseek-v2": {
|
||
"description": "DeepSeek V2 is an efficient Mixture-of-Experts language model, suitable for cost-effective processing needs."
|
||
},
|
||
"deepseek-v2:236b": {
|
||
"description": "DeepSeek V2 236B is the design code model of DeepSeek, providing powerful code generation capabilities."
|
||
},
|
||
"deepseek-v3": {
|
||
"description": "DeepSeek-V3 is a MoE model developed by Hangzhou DeepSeek Artificial Intelligence Technology Research Co., Ltd., achieving outstanding results in multiple evaluations and ranking first among open-source models on mainstream leaderboards. Compared to the V2.5 model, V3 has achieved a threefold increase in generation speed, providing users with a faster and smoother experience."
|
||
},
|
||
"deepseek-v3-0324": {
|
||
"description": "DeepSeek-V3-0324 is a 671B parameter MoE model, excelling in programming and technical capabilities, contextual understanding, and long text processing."
|
||
},
|
||
"deepseek-v3.1": {
|
||
"description": "DeepSeek-V3.1 is a newly launched hybrid reasoning model by DeepSeek, supporting two reasoning modes: thinking and non-thinking. It offers higher thinking efficiency compared to DeepSeek-R1-0528. With post-training optimization, the use of Agent tools and agent task performance have been significantly enhanced. It supports a 128k context window and an output length of up to 64k tokens."
|
||
},
|
||
"deepseek-v3.1-terminus": {
|
||
"description": "DeepSeek-V3.1-Terminus is an optimized large language model developed by DeepSeek, specifically tailored for terminal devices."
|
||
},
|
||
"deepseek-v3.1:671b": {
|
||
"description": "DeepSeek V3.1: The next-generation reasoning model that enhances complex reasoning and chain-of-thought capabilities, suitable for tasks requiring in-depth analysis."
|
||
},
|
||
"deepseek-v3.2-exp": {
|
||
"description": "deepseek-v3.2-exp introduces a sparse attention mechanism designed to enhance training and inference efficiency when processing long texts, priced lower than deepseek-v3.1."
|
||
},
|
||
"deepseek/deepseek-chat-v3-0324": {
|
||
"description": "DeepSeek V3 is a 685B parameter expert mixture model, the latest iteration in the DeepSeek team's flagship chat model series.\n\nIt inherits from the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs excellently across various tasks."
|
||
},
|
||
"deepseek/deepseek-chat-v3-0324:free": {
|
||
"description": "DeepSeek V3 is a 685B parameter expert mixture model, the latest iteration in the DeepSeek team's flagship chat model series.\n\nIt inherits from the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs excellently across various tasks."
|
||
},
|
||
"deepseek/deepseek-chat-v3.1": {
|
||
"description": "DeepSeek-V3.1 is a large hybrid reasoning model supporting 128K long context and efficient mode switching, delivering outstanding performance and speed in tool invocation, code generation, and complex reasoning tasks."
|
||
},
|
||
"deepseek/deepseek-r1": {
|
||
"description": "The DeepSeek R1 model has undergone minor version upgrades, currently at DeepSeek-R1-0528. The latest update significantly enhances inference depth and capability by leveraging increased compute resources and post-training algorithmic optimizations. The model performs excellently on benchmarks in mathematics, programming, and general logic, with overall performance approaching leading models like O3 and Gemini 2.5 Pro."
|
||
},
|
||
"deepseek/deepseek-r1-0528": {
|
||
"description": "DeepSeek-R1 greatly improves model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first generates a chain of thought to enhance answer accuracy."
|
||
},
|
||
"deepseek/deepseek-r1-0528:free": {
|
||
"description": "DeepSeek-R1 greatly improves model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first generates a chain of thought to enhance answer accuracy."
|
||
},
|
||
"deepseek/deepseek-r1-distill-llama-70b": {
|
||
"description": "DeepSeek-R1-Distill-Llama-70B is a distilled, more efficient variant of the 70B Llama model. It maintains strong performance on text generation tasks while reducing computational overhead for easier deployment and research. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
|
||
},
|
||
"deepseek/deepseek-r1-distill-llama-8b": {
|
||
"description": "DeepSeek R1 Distill Llama 8B is a distilled large language model based on Llama-3.1-8B-Instruct, trained using outputs from DeepSeek R1."
|
||
},
|
||
"deepseek/deepseek-r1-distill-qwen-14b": {
|
||
"description": "DeepSeek R1 Distill Qwen 14B is a distilled large language model based on Qwen 2.5 14B, trained using outputs from DeepSeek R1. This model has surpassed OpenAI's o1-mini in several benchmark tests, achieving state-of-the-art results for dense models. Here are some benchmark results:\nAIME 2024 pass@1: 69.7\nMATH-500 pass@1: 93.9\nCodeForces Rating: 1481\nThis model demonstrates competitive performance comparable to larger cutting-edge models through fine-tuning from DeepSeek R1 outputs."
|
||
},
|
||
"deepseek/deepseek-r1-distill-qwen-32b": {
|
||
"description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on Qwen 2.5 32B, trained using outputs from DeepSeek R1. This model has surpassed OpenAI's o1-mini in several benchmark tests, achieving state-of-the-art results for dense models. Here are some benchmark results:\nAIME 2024 pass@1: 72.6\nMATH-500 pass@1: 94.3\nCodeForces Rating: 1691\nThis model demonstrates competitive performance comparable to larger cutting-edge models through fine-tuning from DeepSeek R1 outputs."
|
||
},
|
||
"deepseek/deepseek-r1/community": {
|
||
"description": "DeepSeek R1 is the latest open-source model released by the DeepSeek team, featuring impressive inference performance, particularly in mathematics, programming, and reasoning tasks, reaching levels comparable to OpenAI's o1 model."
|
||
},
|
||
"deepseek/deepseek-r1:free": {
|
||
"description": "DeepSeek-R1 significantly enhances model reasoning capabilities with minimal labeled data. Before outputting the final answer, the model first provides a chain of thought to improve the accuracy of the final response."
|
||
},
|
||
"deepseek/deepseek-v3": {
|
||
"description": "A fast, general-purpose large language model with enhanced reasoning capabilities."
|
||
},
|
||
"deepseek/deepseek-v3.1-base": {
|
||
"description": "DeepSeek V3.1 Base is an improved version of the DeepSeek V3 model."
|
||
},
|
||
"deepseek/deepseek-v3/community": {
|
||
"description": "DeepSeek-V3 has achieved a significant breakthrough in inference speed compared to previous models. It ranks first among open-source models and can compete with the world's most advanced closed-source models. DeepSeek-V3 employs Multi-Head Latent Attention (MLA) and DeepSeekMoE architectures, which have been thoroughly validated in DeepSeek-V2. Additionally, DeepSeek-V3 introduces an auxiliary lossless strategy for load balancing and sets multi-label prediction training objectives for enhanced performance."
|
||
},
|
||
"deepseek_r1": {
|
||
"description": "DeepSeek-R1 is a reinforcement learning (RL) driven reasoning model that addresses issues of repetition and readability within the model. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematics, coding, and reasoning tasks, and enhances overall effectiveness through carefully designed training methods."
|
||
},
|
||
"deepseek_r1_distill_llama_70b": {
|
||
"description": "DeepSeek-R1-Distill-Llama-70B is a model obtained through distillation training based on Llama-3.3-70B-Instruct. This model is part of the DeepSeek-R1 series and showcases excellent performance in mathematics, programming, and reasoning through fine-tuning with samples generated by DeepSeek-R1."
|
||
},
|
||
"deepseek_r1_distill_qwen_14b": {
|
||
"description": "DeepSeek-R1-Distill-Qwen-14B is a model derived from Qwen2.5-14B through knowledge distillation. This model is fine-tuned using 800,000 curated samples generated by DeepSeek-R1, showcasing excellent reasoning capabilities."
|
||
},
|
||
"deepseek_r1_distill_qwen_32b": {
|
||
"description": "DeepSeek-R1-Distill-Qwen-32B is a model derived from Qwen2.5-32B through knowledge distillation. This model is fine-tuned using 800,000 curated samples generated by DeepSeek-R1, demonstrating outstanding performance across multiple domains such as mathematics, programming, and reasoning."
|
||
},
|
||
"doubao-1.5-lite-32k": {
|
||
"description": "Doubao-1.5-lite is a new generation lightweight model, offering extreme response speed with performance and latency at a world-class level."
|
||
},
|
||
"doubao-1.5-pro-256k": {
|
||
"description": "Doubao-1.5-pro-256k is an upgraded version of Doubao-1.5-Pro, significantly enhancing overall performance by 10%. It supports reasoning with a 256k context window and an output length of up to 12k tokens. With higher performance, a larger window, and exceptional cost-effectiveness, it is suitable for a wider range of applications."
|
||
},
|
||
"doubao-1.5-pro-32k": {
|
||
"description": "Doubao-1.5-pro is a new generation flagship model with comprehensive performance upgrades, excelling in knowledge, coding, reasoning, and more."
|
||
},
|
||
"doubao-1.5-thinking-pro": {
|
||
"description": "Doubao-1.5 is a brand new deep thinking model that excels in specialized fields such as mathematics, programming, and scientific reasoning, as well as in general tasks like creative writing. It has achieved or is close to the top tier of industry standards in several authoritative benchmarks, including AIME 2024, Codeforces, and GPQA. It supports a 128k context window and 16k output."
|
||
},
|
||
"doubao-1.5-thinking-pro-m": {
|
||
"description": "Doubao-1.5 is a brand-new deep thinking model (the m version comes with native multimodal deep reasoning capabilities). It performs outstandingly in specialized fields such as mathematics, programming, scientific reasoning, as well as general tasks like creative writing. It achieves or approaches top-tier industry benchmarks on AIME 2024, Codeforces, GPQA, and more. Supports a 128k context window and 16k output."
|
||
},
|
||
"doubao-1.5-thinking-vision-pro": {
|
||
"description": "A new visual deep thinking model with enhanced general multimodal understanding and reasoning capabilities, achieving state-of-the-art (SOTA) results on 37 out of 59 public evaluation benchmarks."
|
||
},
|
||
"doubao-1.5-ui-tars": {
|
||
"description": "Doubao-1.5-UI-TARS is a native agent model designed for graphical user interface (GUI) interaction. It seamlessly interacts with GUIs through human-like abilities such as perception, reasoning, and action."
|
||
},
|
||
"doubao-1.5-vision-lite": {
|
||
"description": "Doubao-1.5-vision-lite is a newly upgraded multimodal large model that supports image recognition at any resolution and extreme aspect ratios, enhancing visual reasoning, document recognition, detail comprehension, and instruction following capabilities. It supports a context window of 128k and an output length of up to 16k tokens."
|
||
},
|
||
"doubao-1.5-vision-pro": {
|
||
"description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model supporting image recognition at any resolution and extreme aspect ratios. It enhances visual reasoning, document recognition, detailed information understanding, and instruction compliance."
|
||
},
|
||
"doubao-1.5-vision-pro-32k": {
|
||
"description": "Doubao-1.5-vision-pro is a newly upgraded multimodal large model supporting image recognition at any resolution and extreme aspect ratios. It enhances visual reasoning, document recognition, detailed information understanding, and instruction compliance."
|
||
},
|
||
"doubao-lite-128k": {
|
||
"description": "Offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 128k context window."
|
||
},
|
||
"doubao-lite-32k": {
|
||
"description": "Offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 32k context window."
|
||
},
|
||
"doubao-lite-4k": {
|
||
"description": "Offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 4k context window."
|
||
},
|
||
"doubao-pro-256k": {
|
||
"description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 256k context window."
|
||
},
|
||
"doubao-pro-32k": {
|
||
"description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 32k context window."
|
||
},
|
||
"doubao-seed-1.6": {
|
||
"description": "Doubao-Seed-1.6 is a brand-new multimodal deep thinking model supporting auto, thinking, and non-thinking modes. In non-thinking mode, its performance significantly surpasses Doubao-1.5-pro/250115. It supports a 256k context window and output lengths up to 16k tokens."
|
||
},
|
||
"doubao-seed-1.6-flash": {
|
||
"description": "Doubao-Seed-1.6-flash is an ultra-fast multimodal deep thinking model with TPOT inference speed as low as 10ms; it supports both text and visual understanding. Its text comprehension exceeds the previous lite generation, and its visual understanding rivals competitor pro series models. It supports a 256k context window and output lengths up to 16k tokens."
|
||
},
|
||
"doubao-seed-1.6-lite": {
|
||
"description": "Doubao-Seed-1.6-lite is a new multimodal deep reasoning model with adjustable reasoning effort—Minimal, Low, Medium, and High. It offers exceptional cost-performance and is an ideal choice for common tasks, supporting context windows up to 256k."
|
||
},
|
||
"doubao-seed-1.6-thinking": {
|
||
"description": "Doubao-Seed-1.6-thinking features greatly enhanced thinking capabilities. Compared to Doubao-1.5-thinking-pro, it further improves foundational skills such as coding, math, and logical reasoning, and supports visual understanding. It supports a 256k context window and output lengths up to 16k tokens."
|
||
},
|
||
"doubao-seed-1.6-vision": {
|
||
"description": "Doubao-Seed-1.6-vision is a visual deep thinking model that demonstrates stronger general multimodal understanding and reasoning capabilities in scenarios such as education, image review, inspection and security, and AI search Q&A. It supports a 256k context window and an output length of up to 64k tokens."
|
||
},
|
||
"doubao-seededit-3-0-i2i-250628": {
|
||
"description": "Doubao image generation model developed by ByteDance Seed team supports both text and image inputs, providing a highly controllable and high-quality image generation experience. Supports image editing via text instructions, generating images with dimensions between 512 and 1536 pixels."
|
||
},
|
||
"doubao-seedream-3-0-t2i-250415": {
|
||
"description": "Seedream 3.0 image generation model developed by ByteDance Seed team supports text and image inputs, delivering a highly controllable and high-quality image generation experience. Generates images based on text prompts."
|
||
},
|
||
"doubao-seedream-4-0-250828": {
|
||
"description": "Seedream 4.0 image generation model developed by ByteDance Seed team supports text and image inputs, offering a highly controllable and high-quality image generation experience. Generates images based on text prompts."
|
||
},
|
||
"doubao-vision-lite-32k": {
|
||
"description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities along with precise instruction comprehension. It demonstrates strong performance in image-text information extraction and image-based reasoning tasks, applicable to more complex and diverse visual question answering scenarios."
|
||
},
|
||
"doubao-vision-pro-32k": {
|
||
"description": "The Doubao-vision model is a multimodal large model launched by Doubao, featuring powerful image understanding and reasoning capabilities along with precise instruction comprehension. It demonstrates strong performance in image-text information extraction and image-based reasoning tasks, applicable to more complex and diverse visual question answering scenarios."
|
||
},
|
||
"emohaa": {
|
||
"description": "Emohaa is a psychological model with professional counseling capabilities, helping users understand emotional issues."
|
||
},
|
||
"ernie-3.5-128k": {
|
||
"description": "Baidu's flagship large-scale language model, covering a vast amount of Chinese and English corpus, possesses strong general capabilities to meet the requirements of most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
|
||
},
|
||
"ernie-3.5-8k": {
|
||
"description": "Baidu's flagship large-scale language model, covering a vast amount of Chinese and English corpus, possesses strong general capabilities to meet the requirements of most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
|
||
},
|
||
"ernie-3.5-8k-preview": {
|
||
"description": "Baidu's flagship large-scale language model, covering a vast amount of Chinese and English corpus, possesses strong general capabilities to meet the requirements of most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
|
||
},
|
||
"ernie-4.0-8k-latest": {
|
||
"description": "Baidu's flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
|
||
},
|
||
"ernie-4.0-8k-preview": {
|
||
"description": "Baidu's flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information."
|
||
},
|
||
"ernie-4.0-turbo-128k": {
|
||
"description": "Baidu's flagship ultra-large-scale language model, demonstrating outstanding overall performance, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It performs better than ERNIE 4.0 in terms of performance."
|
||
},
|
||
"ernie-4.0-turbo-8k-latest": {
|
||
"description": "Baidu's flagship ultra-large-scale language model, demonstrating outstanding overall performance, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It performs better than ERNIE 4.0 in terms of performance."
|
||
},
|
||
"ernie-4.0-turbo-8k-preview": {
|
||
"description": "Baidu's flagship ultra-large-scale language model, demonstrating outstanding overall performance, widely applicable to complex task scenarios across various fields; it supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It performs better than ERNIE 4.0 in terms of performance."
|
||
},
|
||
"ernie-4.5-21b-a3b": {
|
||
"description": "ERNIE 4.5 21B A3B is a Mixture of Experts model from Baidu's Wenxin series, offering strong reasoning and multilingual capabilities."
|
||
},
|
||
"ernie-4.5-300b-a47b": {
|
||
"description": "ERNIE 4.5 300B A47B is a large-scale Mixture of Experts model from Baidu's Wenxin series, delivering exceptional reasoning performance."
|
||
},
|
||
"ernie-4.5-8k-preview": {
|
||
"description": "ERNIE 4.5 is Baidu's self-developed next-generation native multimodal foundational model, achieving collaborative optimization through joint modeling of multiple modalities, with excellent multimodal understanding capabilities; it features enhanced language abilities, with significant improvements in understanding, generation, logic, and memory, as well as reduced hallucinations and improved logical reasoning and coding capabilities."
|
||
},
|
||
"ernie-4.5-turbo-128k": {
|
||
"description": "Wenxin 4.5 Turbo shows significant enhancements in reducing hallucinations, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more cost-effective. The model's capabilities have been comprehensively improved to better meet the needs of multi-turn long history dialogue processing and long document understanding Q&A tasks."
|
||
},
|
||
"ernie-4.5-turbo-32k": {
|
||
"description": "Wenxin 4.5 Turbo has also shown significant enhancements in reducing hallucinations, logical reasoning, and coding capabilities. Compared to Wenxin 4.5, it is faster and more cost-effective. There are notable improvements in text creation and knowledge Q&A capabilities. The output length and sentence delay have increased compared to ERNIE 4.5."
|
||
},
|
||
"ernie-4.5-turbo-vl-32k": {
|
||
"description": "A brand new version of the Wenxin large model, with significant improvements in image understanding, creation, translation, and coding capabilities, now supports a context length of 32K for the first time, with a significant reduction in first token delay."
|
||
},
|
||
"ernie-char-8k": {
|
||
"description": "Baidu's vertical scene large language model, suitable for applications such as game NPCs, customer service dialogues, and role-playing conversations, with a more distinct and consistent character style, stronger instruction-following capabilities, and superior inference performance."
|
||
},
|
||
"ernie-char-fiction-8k": {
|
||
"description": "Baidu's vertical scene large language model, suitable for applications such as game NPCs, customer service dialogues, and role-playing conversations, with a more distinct and consistent character style, stronger instruction-following capabilities, and superior inference performance."
|
||
},
|
||
"ernie-irag-edit": {
|
||
"description": "Baidu's self-developed ERNIE iRAG Edit image editing model supports operations such as erase (object removal), repaint (object redrawing), and variation (variant generation) based on images."
|
||
},
|
||
"ernie-lite-8k": {
|
||
"description": "ERNIE Lite is Baidu's lightweight large language model, balancing excellent model performance with inference efficiency, suitable for low-power AI acceleration card inference."
|
||
},
|
||
"ernie-lite-pro-128k": {
|
||
"description": "Baidu's lightweight large language model, balancing excellent model performance with inference efficiency, offering better performance than ERNIE Lite, suitable for low-power AI acceleration card inference."
|
||
},
|
||
"ernie-novel-8k": {
|
||
"description": "Baidu's general-purpose large language model, which has a significant advantage in novel continuation capabilities and can also be used in short plays, movies, and other scenarios."
|
||
},
|
||
"ernie-speed-128k": {
|
||
"description": "Baidu's latest self-developed high-performance large language model released in 2024, with excellent general capabilities, suitable as a base model for fine-tuning to better address specific scenario issues while also demonstrating excellent inference performance."
|
||
},
|
||
"ernie-speed-pro-128k": {
|
||
"description": "Baidu's latest self-developed high-performance large language model released in 2024, with excellent general capabilities, offering better performance than ERNIE Speed, suitable as a base model for fine-tuning to better address specific scenario issues while also demonstrating excellent inference performance."
|
||
},
|
||
"ernie-tiny-8k": {
|
||
"description": "ERNIE Tiny is Baidu's ultra-high-performance large language model, with the lowest deployment and fine-tuning costs among the Wenxin series models."
|
||
},
|
||
"ernie-x1-32k": {
|
||
"description": "Possesses stronger abilities in understanding, planning, reflection, and evolution. As a more comprehensive deep thinking model, Wenxin X1 combines accuracy, creativity, and eloquence, excelling in areas such as Chinese knowledge Q&A, literary creation, document writing, daily conversation, logical reasoning, complex calculations, and tool invocation."
|
||
},
|
||
"ernie-x1-32k-preview": {
|
||
"description": "The ERNIE X1 model possesses stronger understanding, planning, reflection, and evolution capabilities. As a more comprehensive deep thinking model, ERNIE X1 excels in accuracy, creativity, and eloquence, particularly in Chinese knowledge Q&A, literary creation, document writing, daily conversation, logical reasoning, complex calculations, and tool invocation."
|
||
},
|
||
"ernie-x1-turbo-32k": {
|
||
"description": "The model performs better in terms of effectiveness and performance compared to ERNIE-X1-32K."
|
||
},
|
||
"fal-ai/bytedance/seedream/v4": {
|
||
"description": "Seedream 4.0 image generation model developed by ByteDance Seed team supports text and image inputs, providing a highly controllable and high-quality image generation experience. Generates images based on text prompts."
|
||
},
|
||
"fal-ai/flux-kontext/dev": {
|
||
"description": "FLUX.1 model focused on image editing tasks, supporting both text and image inputs."
|
||
},
|
||
"fal-ai/flux-pro/kontext": {
|
||
"description": "FLUX.1 Kontext [pro] can process text and reference images as inputs, seamlessly enabling targeted local edits and complex overall scene transformations."
|
||
},
|
||
"fal-ai/flux/krea": {
|
||
"description": "Flux Krea [dev] is an image generation model with an aesthetic preference, aiming to produce more realistic and natural images."
|
||
},
|
||
"fal-ai/flux/schnell": {
|
||
"description": "FLUX.1 [schnell] is a 12-billion-parameter image generation model focused on fast generation of high-quality images."
|
||
},
|
||
"fal-ai/hunyuan-image/v3": {
|
||
"description": "A powerful native multimodal image generation model"
|
||
},
|
||
"fal-ai/imagen4/preview": {
|
||
"description": "High-quality image generation model provided by Google."
|
||
},
|
||
"fal-ai/nano-banana": {
|
||
"description": "Nano Banana is Google's latest, fastest, and most efficient native multimodal model, allowing you to generate and edit images through conversation."
|
||
},
|
||
"fal-ai/qwen-image": {
|
||
"description": "Powerful raw image model from the Qwen team, featuring impressive Chinese text generation capabilities and diverse visual styles."
|
||
},
|
||
"fal-ai/qwen-image-edit": {
|
||
"description": "Professional image editing model released by the Qwen team, supporting semantic and appearance editing, precise editing of Chinese and English text, style transfer, object rotation, and other high-quality image edits."
|
||
},
|
||
"flux-1-schnell": {
|
||
"description": "Developed by Black Forest Labs, this 12-billion-parameter text-to-image model uses latent adversarial diffusion distillation technology to generate high-quality images within 1 to 4 steps. Its performance rivals closed-source alternatives and is released under the Apache-2.0 license, suitable for personal, research, and commercial use."
|
||
},
|
||
"flux-dev": {
|
||
"description": "FLUX.1 [dev] is an open-source weight and fine-tuned model for non-commercial applications. It maintains image quality and instruction-following capabilities close to the FLUX professional version while offering higher operational efficiency. Compared to standard models of the same size, it is more resource-efficient."
|
||
},
|
||
"flux-kontext-max": {
|
||
"description": "State-of-the-art contextual image generation and editing — combining text and images for precise, coherent results."
|
||
},
|
||
"flux-kontext-pro": {
|
||
"description": "State-of-the-art contextual image generation and editing — combining text and images for precise, coherent results."
|
||
},
|
||
"flux-merged": {
|
||
"description": "The FLUX.1-merged model combines the deep features explored during the development phase of “DEV” with the high-speed execution advantages represented by “Schnell.” This integration not only pushes the model's performance boundaries but also broadens its application scope."
|
||
},
|
||
"flux-pro": {
|
||
"description": "A top-tier commercial AI image generation model — delivering unparalleled image quality and a wide variety of outputs."
|
||
},
|
||
"flux-pro-1.1": {
|
||
"description": "Upgraded professional-grade AI image generation model — delivers outstanding image quality and precise adherence to prompts."
|
||
},
|
||
"flux-pro-1.1-ultra": {
|
||
"description": "Ultra-high-resolution AI image generation — supports up to 4-megapixel output, producing ultra-high-definition images in under 10 seconds."
|
||
},
|
||
"flux-schnell": {
|
||
"description": "FLUX.1 [schnell], currently the most advanced open-source few-step model, surpasses competitors and even powerful non-distilled models like Midjourney v6.0 and DALL·E 3 (HD). Finely tuned to retain the full output diversity from pretraining, FLUX.1 [schnell] significantly enhances visual quality, instruction compliance, size/aspect ratio variation, font handling, and output diversity compared to state-of-the-art models on the market, offering users a richer and more diverse creative image generation experience."
|
||
},
|
||
"flux.1-schnell": {
|
||
"description": "A 12-billion-parameter rectified flow transformer capable of generating images based on text descriptions."
|
||
},
|
||
"gemini-1.0-pro-001": {
|
||
"description": "Gemini 1.0 Pro 001 (Tuning) offers stable and tunable performance, making it an ideal choice for complex task solutions."
|
||
},
|
||
"gemini-1.0-pro-002": {
|
||
"description": "Gemini 1.0 Pro 002 (Tuning) provides excellent multimodal support, focusing on effective solutions for complex tasks."
|
||
},
|
||
"gemini-1.0-pro-latest": {
|
||
"description": "Gemini 1.0 Pro is Google's high-performance AI model, designed for extensive task scaling."
|
||
},
|
||
"gemini-1.5-flash-001": {
|
||
"description": "Gemini 1.5 Flash 001 is an efficient multimodal model that supports extensive application scaling."
|
||
},
|
||
"gemini-1.5-flash-002": {
|
||
"description": "Gemini 1.5 Flash 002 is an efficient multimodal model that supports a wide range of applications."
|
||
},
|
||
"gemini-1.5-flash-8b": {
|
||
"description": "Gemini 1.5 Flash 8B is an efficient multimodal model that supports a wide range of applications."
|
||
},
|
||
"gemini-1.5-flash-8b-exp-0924": {
|
||
"description": "Gemini 1.5 Flash 8B 0924 is the latest experimental model, showcasing significant performance improvements in both text and multimodal use cases."
|
||
},
|
||
"gemini-1.5-flash-8b-latest": {
|
||
"description": "Gemini 1.5 Flash 8B is a highly efficient multimodal model designed for scalable applications."
|
||
},
|
||
"gemini-1.5-flash-exp-0827": {
|
||
"description": "Gemini 1.5 Flash 0827 provides optimized multimodal processing capabilities, suitable for various complex task scenarios."
|
||
},
|
||
"gemini-1.5-flash-latest": {
|
||
"description": "Gemini 1.5 Flash is Google's latest multimodal AI model, featuring fast processing capabilities and supporting text, image, and video inputs, making it suitable for efficient scaling across various tasks."
|
||
},
|
||
"gemini-1.5-pro-001": {
|
||
"description": "Gemini 1.5 Pro 001 is a scalable multimodal AI solution that supports a wide range of complex tasks."
|
||
},
|
||
"gemini-1.5-pro-002": {
|
||
"description": "Gemini 1.5 Pro 002 is the latest production-ready model, delivering higher quality outputs, with notable enhancements in mathematics, long-context, and visual tasks."
|
||
},
|
||
"gemini-1.5-pro-exp-0801": {
|
||
"description": "Gemini 1.5 Pro 0801 offers excellent multimodal processing capabilities, providing greater flexibility for application development."
|
||
},
|
||
"gemini-1.5-pro-exp-0827": {
|
||
"description": "Gemini 1.5 Pro 0827 combines the latest optimization technologies for more efficient multimodal data processing."
|
||
},
|
||
"gemini-1.5-pro-latest": {
|
||
"description": "Gemini 1.5 Pro supports up to 2 million tokens, making it an ideal choice for medium-sized multimodal models, providing multifaceted support for complex tasks."
|
||
},
|
||
"gemini-2.0-flash": {
|
||
"description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, native tool usage, multimodal generation, and a 1M token context window."
|
||
},
|
||
"gemini-2.0-flash-001": {
|
||
"description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, native tool usage, multimodal generation, and a 1M token context window."
|
||
},
|
||
"gemini-2.0-flash-exp": {
|
||
"description": "Gemini 2.0 Flash model variant optimized for cost-effectiveness and low latency."
|
||
},
|
||
"gemini-2.0-flash-exp-image-generation": {
|
||
"description": "Gemini 2.0 Flash experimental model, supports image generation"
|
||
},
|
||
"gemini-2.0-flash-lite": {
|
||
"description": "Gemini 2.0 Flash is a variant of the model optimized for cost-effectiveness and low latency."
|
||
},
|
||
"gemini-2.0-flash-lite-001": {
|
||
"description": "Gemini 2.0 Flash is a variant of the model optimized for cost-effectiveness and low latency."
|
||
},
|
||
"gemini-2.0-flash-preview-image-generation": {
|
||
"description": "Gemini 2.0 Flash preview model, supports image generation"
|
||
},
|
||
"gemini-2.5-flash": {
|
||
"description": "Gemini 2.5 Flash is Google's most cost-effective model, offering comprehensive capabilities."
|
||
},
|
||
"gemini-2.5-flash-image": {
|
||
"description": "Nano Banana is Google's latest, fastest, and most efficient native multimodal model, allowing you to generate and edit images through conversation."
|
||
},
|
||
"gemini-2.5-flash-image-preview": {
|
||
"description": "Nano Banana is Google's latest, fastest, and most efficient native multimodal model, enabling you to generate and edit images through conversation."
|
||
},
|
||
"gemini-2.5-flash-image-preview:image": {
|
||
"description": "Nano Banana is Google's latest, fastest, and most efficient native multimodal model, enabling you to generate and edit images through conversation."
|
||
},
|
||
"gemini-2.5-flash-image:image": {
|
||
"description": "Nano Banana is Google's latest, fastest, and most efficient native multimodal model, allowing you to generate and edit images through conversation."
|
||
},
|
||
"gemini-2.5-flash-lite": {
|
||
"description": "Gemini 2.5 Flash-Lite is Google's smallest and most cost-effective model, designed for large-scale use."
|
||
},
|
||
"gemini-2.5-flash-lite-preview-06-17": {
|
||
"description": "Gemini 2.5 Flash-Lite Preview is Google's smallest and most cost-efficient model, designed for large-scale usage."
|
||
},
|
||
"gemini-2.5-flash-lite-preview-09-2025": {
|
||
"description": "Preview release (September 25th, 2025) of Gemini 2.5 Flash-Lite"
|
||
},
|
||
"gemini-2.5-flash-preview-04-17": {
|
||
"description": "Gemini 2.5 Flash Preview is Google's most cost-effective model, offering a comprehensive set of features."
|
||
},
|
||
"gemini-2.5-flash-preview-05-20": {
|
||
"description": "Gemini 2.5 Flash Preview is Google's most cost-effective model, offering comprehensive capabilities."
|
||
},
|
||
"gemini-2.5-flash-preview-09-2025": {
|
||
"description": "Preview release (September 25th, 2025) of Gemini 2.5 Flash"
|
||
},
|
||
"gemini-2.5-pro": {
|
||
"description": "Gemini 2.5 Pro is Google's most advanced reasoning model, capable of tackling complex problems in coding, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long-context processing."
|
||
},
|
||
"gemini-2.5-pro-preview-03-25": {
|
||
"description": "Gemini 2.5 Pro Preview is Google's most advanced thinking model, capable of reasoning about complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long-context analysis."
|
||
},
|
||
"gemini-2.5-pro-preview-05-06": {
|
||
"description": "Gemini 2.5 Pro Preview is Google's most advanced reasoning model, capable of reasoning about complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long context."
|
||
},
|
||
"gemini-2.5-pro-preview-06-05": {
|
||
"description": "Gemini 2.5 Pro Preview is Google's most advanced cognitive model, capable of reasoning through complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using long-context understanding."
|
||
},
|
||
"gemini-flash-latest": {
|
||
"description": "Latest release of Gemini Flash"
|
||
},
|
||
"gemini-flash-lite-latest": {
|
||
"description": "Latest release of Gemini Flash-Lite"
|
||
},
|
||
"gemini-pro-latest": {
|
||
"description": "Latest release of Gemini Pro"
|
||
},
|
||
"gemma-7b-it": {
|
||
"description": "Gemma 7B is suitable for medium to small-scale task processing, offering cost-effectiveness."
|
||
},
|
||
"gemma2": {
|
||
"description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
|
||
},
|
||
"gemma2-9b-it": {
|
||
"description": "Gemma 2 9B is a model optimized for specific tasks and tool integration."
|
||
},
|
||
"gemma2:27b": {
|
||
"description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
|
||
},
|
||
"gemma2:2b": {
|
||
"description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
|
||
},
|
||
"generalv3": {
|
||
"description": "Spark Pro is a high-performance large language model optimized for professional fields, focusing on mathematics, programming, healthcare, education, and more, supporting online search and built-in plugins for weather, dates, etc. Its optimized model demonstrates excellent performance and efficiency in complex knowledge Q&A, language understanding, and high-level text creation, making it an ideal choice for professional application scenarios."
|
||
},
|
||
"generalv3.5": {
|
||
"description": "Spark3.5 Max is the most comprehensive version, supporting online search and numerous built-in plugins. Its fully optimized core capabilities, along with system role settings and function calling features, enable it to perform exceptionally well in various complex application scenarios."
|
||
},
|
||
"glm-4": {
|
||
"description": "GLM-4 is the old flagship version released in January 2024, currently replaced by the more powerful GLM-4-0520."
|
||
},
|
||
"glm-4-0520": {
|
||
"description": "GLM-4-0520 is the latest model version designed for highly complex and diverse tasks, demonstrating outstanding performance."
|
||
},
|
||
"glm-4-9b-chat": {
|
||
"description": "GLM-4-9B-Chat demonstrates high performance across semantics, mathematics, reasoning, coding, and knowledge. It also supports web browsing, code execution, custom tool invocation, and long-text reasoning. Supports 26 languages including Japanese, Korean, and German."
|
||
},
|
||
"glm-4-air": {
|
||
"description": "GLM-4-Air is a cost-effective version with performance close to GLM-4, offering fast speed at an affordable price."
|
||
},
|
||
"glm-4-air-250414": {
|
||
"description": "GLM-4-Air is a cost-effective version, with performance close to GLM-4, offering fast speed at an affordable price."
|
||
},
|
||
"glm-4-airx": {
|
||
"description": "GLM-4-AirX provides an efficient version of GLM-4-Air, with inference speeds up to 2.6 times faster."
|
||
},
|
||
"glm-4-alltools": {
|
||
"description": "GLM-4-AllTools is a multifunctional intelligent agent model optimized to support complex instruction planning and tool invocation, such as web browsing, code interpretation, and text generation, suitable for multitasking."
|
||
},
|
||
"glm-4-flash": {
|
||
"description": "GLM-4-Flash is the ideal choice for handling simple tasks, being the fastest and most cost-effective."
|
||
},
|
||
"glm-4-flash-250414": {
|
||
"description": "GLM-4-Flash is the ideal choice for handling simple tasks, being the fastest and free."
|
||
},
|
||
"glm-4-flashx": {
|
||
"description": "GLM-4-FlashX is an enhanced version of Flash, featuring ultra-fast inference speed."
|
||
},
|
||
"glm-4-long": {
|
||
"description": "GLM-4-Long supports ultra-long text inputs, suitable for memory-based tasks and large-scale document processing."
|
||
},
|
||
"glm-4-plus": {
|
||
"description": "GLM-4-Plus, as a high-intelligence flagship, possesses strong capabilities for processing long texts and complex tasks, with overall performance improvements."
|
||
},
|
||
"glm-4.1v-thinking-flash": {
|
||
"description": "The GLM-4.1V-Thinking series represents the most powerful vision-language models known at the 10B parameter scale, integrating state-of-the-art capabilities across various vision-language tasks such as video understanding, image question answering, academic problem solving, OCR text recognition, document and chart interpretation, GUI agents, front-end web coding, and grounding. Its performance in many tasks even surpasses that of Qwen2.5-VL-72B, which has over eight times the parameters. Leveraging advanced reinforcement learning techniques, the model masters Chain-of-Thought reasoning to improve answer accuracy and richness, significantly outperforming traditional non-thinking models in final results and interpretability."
|
||
},
|
||
"glm-4.1v-thinking-flashx": {
|
||
"description": "The GLM-4.1V-Thinking series represents the most powerful vision-language models known at the 10B parameter scale, integrating state-of-the-art capabilities across various vision-language tasks such as video understanding, image question answering, academic problem solving, OCR text recognition, document and chart interpretation, GUI agents, front-end web coding, and grounding. Its performance in many tasks even surpasses that of Qwen2.5-VL-72B, which has over eight times the parameters. Leveraging advanced reinforcement learning techniques, the model masters Chain-of-Thought reasoning to improve answer accuracy and richness, significantly outperforming traditional non-thinking models in final results and interpretability."
|
||
},
|
||
"glm-4.5": {
|
||
"description": "Zhipu's flagship model supports thinking mode switching, with comprehensive capabilities reaching the state-of-the-art level among open-source models, and a context length of up to 128K."
|
||
},
|
||
"glm-4.5-air": {
|
||
"description": "A lightweight version of GLM-4.5 balancing performance and cost-effectiveness, capable of flexibly switching hybrid thinking models."
|
||
},
|
||
"glm-4.5-airx": {
|
||
"description": "The ultra-fast version of GLM-4.5-Air, offering faster response speeds, designed for large-scale high-speed demands."
|
||
},
|
||
"glm-4.5-flash": {
|
||
"description": "The free version of GLM-4.5, delivering excellent performance in inference, coding, and agent tasks."
|
||
},
|
||
"glm-4.5-x": {
|
||
"description": "The high-speed version of GLM-4.5, combining strong performance with generation speeds up to 100 tokens per second."
|
||
},
|
||
"glm-4.5v": {
|
||
"description": "Zhipu's next-generation visual reasoning model is built on a Mixture-of-Experts (MoE) architecture. With 106B total parameters and 12B activated parameters, it achieves state-of-the-art performance among open-source multimodal models of similar scale across various benchmarks, supporting common tasks such as image, video, document understanding, and GUI-related tasks."
|
||
},
|
||
"glm-4.6": {
|
||
"description": "Zhipu's latest flagship model GLM-4.6 (355B) surpasses its predecessor comprehensively in advanced encoding, long text processing, reasoning, and agent capabilities, especially aligning with Claude Sonnet 4 in programming skills, making it a top-tier coding model in China."
|
||
},
|
||
"glm-4v": {
|
||
"description": "GLM-4V provides strong image understanding and reasoning capabilities, supporting various visual tasks."
|
||
},
|
||
"glm-4v-flash": {
|
||
"description": "GLM-4V-Flash focuses on efficient single image understanding, suitable for scenarios that require rapid image parsing, such as real-time image analysis or batch image processing."
|
||
},
|
||
"glm-4v-plus": {
|
||
"description": "GLM-4V-Plus has the ability to understand video content and multiple images, suitable for multimodal tasks."
|
||
},
|
||
"glm-4v-plus-0111": {
|
||
"description": "GLM-4V-Plus has the capability to understand video content and multiple images, making it suitable for multimodal tasks."
|
||
},
|
||
"glm-z1-air": {
|
||
"description": "Reasoning model: possesses strong reasoning capabilities, suitable for tasks requiring deep reasoning."
|
||
},
|
||
"glm-z1-airx": {
|
||
"description": "Ultra-fast reasoning: features extremely fast reasoning speed and powerful reasoning effects."
|
||
},
|
||
"glm-z1-flash": {
|
||
"description": "The GLM-Z1 series features powerful complex reasoning abilities, excelling in logic reasoning, mathematics, and programming."
|
||
},
|
||
"glm-z1-flashx": {
|
||
"description": "High speed and low cost: Flash enhanced version with ultra-fast inference speed and improved concurrency support."
|
||
},
|
||
"glm-zero-preview": {
|
||
"description": "GLM-Zero-Preview possesses strong complex reasoning abilities, excelling in logical reasoning, mathematics, programming, and other fields."
|
||
},
|
||
"google/gemini-2.0-flash": {
|
||
"description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, built-in tool usage, multimodal generation, and a 1 million token context window."
|
||
},
|
||
"google/gemini-2.0-flash-001": {
|
||
"description": "Gemini 2.0 Flash offers next-generation features and improvements, including exceptional speed, native tool usage, multimodal generation, and a 1M token context window."
|
||
},
|
||
"google/gemini-2.0-flash-exp:free": {
|
||
"description": "Gemini 2.0 Flash Experimental is Google's latest experimental multimodal AI model, showing a quality improvement compared to historical versions, especially in world knowledge, code, and long context."
|
||
},
|
||
"google/gemini-2.0-flash-lite": {
|
||
"description": "Gemini 2.0 Flash Lite provides next-generation features and improvements, including exceptional speed, built-in tool usage, multimodal generation, and a 1 million token context window."
|
||
},
|
||
"google/gemini-2.5-flash": {
|
||
"description": "Gemini 2.5 Flash is a thoughtful model delivering excellent comprehensive capabilities. It is designed to balance price and performance, supporting multimodal inputs and a 1 million token context window."
|
||
},
|
||
"google/gemini-2.5-flash-image-preview": {
|
||
"description": "Gemini 2.5 Flash experimental model, supporting image generation."
|
||
},
|
||
"google/gemini-2.5-flash-lite": {
|
||
"description": "Gemini 2.5 Flash-Lite is a balanced, low-latency model with configurable reasoning budget and tool connectivity (e.g., Google Search grounding and code execution). It supports multimodal inputs and offers a 1 million token context window."
|
||
},
|
||
"google/gemini-2.5-flash-preview": {
|
||
"description": "Gemini 2.5 Flash is Google's most advanced flagship model, designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in 'thinking' capabilities that allow it to provide responses with higher accuracy and detailed context handling.\n\nNote: This model has two variants: thinking and non-thinking. Output pricing varies significantly based on whether the thinking capability is activated. If you choose the standard variant (without the ':thinking' suffix), the model will explicitly avoid generating thinking tokens.\n\nTo leverage the thinking capability and receive thinking tokens, you must select the ':thinking' variant, which will incur higher thinking output pricing.\n\nAdditionally, Gemini 2.5 Flash can be configured via the 'maximum tokens for reasoning' parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning)."
|
||
},
|
||
"google/gemini-2.5-flash-preview:thinking": {
|
||
"description": "Gemini 2.5 Flash is Google's most advanced flagship model, designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in 'thinking' capabilities that allow it to provide responses with higher accuracy and detailed context handling.\n\nNote: This model has two variants: thinking and non-thinking. Output pricing varies significantly based on whether the thinking capability is activated. If you choose the standard variant (without the ':thinking' suffix), the model will explicitly avoid generating thinking tokens.\n\nTo leverage the thinking capability and receive thinking tokens, you must select the ':thinking' variant, which will incur higher thinking output pricing.\n\nAdditionally, Gemini 2.5 Flash can be configured via the 'maximum tokens for reasoning' parameter, as described in the documentation (https://openrouter.ai/docs/use-cases/reasoning-tokens#max-tokens-for-reasoning)."
|
||
},
|
||
"google/gemini-2.5-pro": {
|
||
"description": "Gemini 2.5 Pro is our most advanced reasoning Gemini model, capable of solving complex problems. It features a 2 million token context window and supports multimodal inputs including text, images, audio, video, and PDF documents."
|
||
},
|
||
"google/gemini-2.5-pro-preview": {
|
||
"description": "Gemini 2.5 Pro Preview is Google's most advanced thinking model, capable of reasoning through complex problems in code, mathematics, and STEM fields, as well as analyzing large datasets, codebases, and documents using extended context."
|
||
},
|
||
"google/gemini-embedding-001": {
|
||
"description": "A state-of-the-art embedding model delivering excellent performance on English, multilingual, and code tasks."
|
||
},
|
||
"google/gemini-flash-1.5": {
|
||
"description": "Gemini 1.5 Flash offers optimized multimodal processing capabilities, suitable for various complex task scenarios."
|
||
},
|
||
"google/gemini-pro-1.5": {
|
||
"description": "Gemini 1.5 Pro combines the latest optimization technologies to deliver more efficient multimodal data processing capabilities."
|
||
},
|
||
"google/gemma-2-27b": {
|
||
"description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
|
||
},
|
||
"google/gemma-2-27b-it": {
|
||
"description": "Gemma 2 continues the design philosophy of being lightweight and efficient."
|
||
},
|
||
"google/gemma-2-2b-it": {
|
||
"description": "Google's lightweight instruction-tuning model."
|
||
},
|
||
"google/gemma-2-9b": {
|
||
"description": "Gemma 2 is an efficient model launched by Google, covering a variety of application scenarios from small applications to complex data processing."
|
||
},
|
||
"google/gemma-2-9b-it": {
|
||
"description": "Gemma 2 is Google's lightweight open-source text model series."
|
||
},
|
||
"google/gemma-2-9b-it:free": {
|
||
"description": "Gemma 2 is Google's lightweight open-source text model series."
|
||
},
|
||
"google/gemma-2b-it": {
|
||
"description": "Gemma Instruct (2B) provides basic instruction processing capabilities, suitable for lightweight applications."
|
||
},
|
||
"google/gemma-3-12b-it": {
|
||
"description": "Gemma 3 12B is an open-source language model from Google that sets new standards in efficiency and performance."
|
||
},
|
||
"google/gemma-3-27b-it": {
|
||
"description": "Gemma 3 27B is an open-source language model from Google that sets new standards in efficiency and performance."
|
||
},
|
||
"google/text-embedding-005": {
|
||
"description": "An English-focused text embedding model optimized for code and English language tasks."
|
||
},
|
||
"google/text-multilingual-embedding-002": {
|
||
"description": "A multilingual text embedding model optimized for cross-lingual tasks, supporting multiple languages."
|
||
},
|
||
"gpt-3.5-turbo": {
|
||
"description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
|
||
},
|
||
"gpt-3.5-turbo-0125": {
|
||
"description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
|
||
},
|
||
"gpt-3.5-turbo-1106": {
|
||
"description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
|
||
},
|
||
"gpt-3.5-turbo-instruct": {
|
||
"description": "GPT 3.5 Turbo is suitable for various text generation and understanding tasks. Currently points to gpt-3.5-turbo-0125."
|
||
},
|
||
"gpt-35-turbo": {
|
||
"description": "GPT 3.5 Turbo is an efficient model provided by OpenAI, suitable for chat and text generation tasks, supporting parallel function calls."
|
||
},
|
||
"gpt-35-turbo-16k": {
|
||
"description": "GPT 3.5 Turbo 16k is a high-capacity text generation model suitable for complex tasks."
|
||
},
|
||
"gpt-4": {
|
||
"description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
|
||
},
|
||
"gpt-4-0125-preview": {
|
||
"description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
|
||
},
|
||
"gpt-4-0613": {
|
||
"description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
|
||
},
|
||
"gpt-4-1106-preview": {
|
||
"description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
|
||
},
|
||
"gpt-4-32k": {
|
||
"description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
|
||
},
|
||
"gpt-4-32k-0613": {
|
||
"description": "GPT-4 offers a larger context window, capable of handling longer text inputs, making it suitable for scenarios that require extensive information integration and data analysis."
|
||
},
|
||
"gpt-4-turbo": {
|
||
"description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
|
||
},
|
||
"gpt-4-turbo-2024-04-09": {
|
||
"description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
|
||
},
|
||
"gpt-4-turbo-preview": {
|
||
"description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
|
||
},
|
||
"gpt-4-vision-preview": {
|
||
"description": "The latest GPT-4 Turbo model features visual capabilities. Now, visual requests can be made using JSON format and function calls. GPT-4 Turbo is an enhanced version that provides cost-effective support for multimodal tasks. It strikes a balance between accuracy and efficiency, making it suitable for applications requiring real-time interaction."
|
||
},
|
||
"gpt-4.1": {
|
||
"description": "GPT-4.1 is our flagship model for complex tasks. It excels at solving problems across various domains."
|
||
},
|
||
"gpt-4.1-mini": {
|
||
"description": "GPT-4.1 mini offers a balance of intelligence, speed, and cost, making it an attractive model for many use cases."
|
||
},
|
||
"gpt-4.1-nano": {
|
||
"description": "GPT-4.1 nano provides a balance of intelligence, speed, and cost, making it an appealing model for numerous applications."
|
||
},
|
||
"gpt-4.5-preview": {
|
||
"description": "GPT-4.5-preview is the latest general-purpose model, offering extensive world knowledge and an improved understanding of user intent. It excels at creative tasks and agent-style planning. The model's knowledge cutoff is October 2023."
|
||
},
|
||
"gpt-4o": {
|
||
"description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
|
||
},
|
||
"gpt-4o-2024-05-13": {
|
||
"description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
|
||
},
|
||
"gpt-4o-2024-08-06": {
|
||
"description": "ChatGPT-4o is a dynamic model that updates in real-time to stay current with the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications, including customer service, education, and technical support."
|
||
},
|
||
"gpt-4o-2024-11-20": {
|
||
"description": "ChatGPT-4o is a dynamic model that updates in real-time to maintain the latest version. It combines powerful language understanding and generation capabilities, making it suitable for large-scale applications including customer service, education, and technical support."
|
||
},
|
||
"gpt-4o-audio-preview": {
|
||
"description": "GPT-4o Audio Preview model, supporting audio input and output."
|
||
},
|
||
"gpt-4o-mini": {
|
||
"description": "GPT-4o mini is the latest model released by OpenAI after GPT-4 Omni, supporting both image and text input while outputting text. As their most advanced small model, it is significantly cheaper than other recent cutting-edge models, costing over 60% less than GPT-3.5 Turbo. It maintains state-of-the-art intelligence while offering remarkable cost-effectiveness. GPT-4o mini scored 82% on the MMLU test and currently ranks higher than GPT-4 in chat preferences."
|
||
},
|
||
"gpt-4o-mini-audio-preview": {
|
||
"description": "GPT-4o mini Audio model supports audio input and output."
|
||
},
|
||
"gpt-4o-mini-realtime-preview": {
|
||
"description": "GPT-4o-mini real-time version, supporting real-time audio and text input and output."
|
||
},
|
||
"gpt-4o-mini-search-preview": {
|
||
"description": "GPT-4o mini Search Preview is a model specifically trained to understand and execute web search queries, using the Chat Completions API. In addition to token fees, web search queries incur charges per tool invocation."
|
||
},
|
||
"gpt-4o-mini-transcribe": {
|
||
"description": "GPT-4o Mini Transcribe is a speech-to-text model that uses GPT-4o to transcribe audio. Compared to the original Whisper model, it improves word error rate, language recognition, and accuracy. Use it for more precise transcriptions."
|
||
},
|
||
"gpt-4o-mini-tts": {
|
||
"description": "GPT-4o mini TTS is a text-to-speech model based on GPT-4o mini, providing high-quality speech generation at a lower cost."
|
||
},
|
||
"gpt-4o-realtime-preview": {
|
||
"description": "GPT-4o real-time version, supporting real-time audio and text input and output."
|
||
},
|
||
"gpt-4o-realtime-preview-2024-10-01": {
|
||
"description": "GPT-4o real-time version, supporting real-time audio and text input and output."
|
||
},
|
||
"gpt-4o-realtime-preview-2025-06-03": {
|
||
"description": "GPT-4o real-time version supporting real-time audio and text input and output."
|
||
},
|
||
"gpt-4o-search-preview": {
|
||
"description": "GPT-4o Search Preview is a model specifically trained to understand and execute web search queries, using the Chat Completions API. In addition to token fees, web search queries incur charges per tool invocation."
|
||
},
|
||
"gpt-4o-transcribe": {
|
||
"description": "GPT-4o Transcribe is a speech-to-text model that uses GPT-4o to transcribe audio. Compared to the original Whisper model, it improves word error rate, language recognition, and accuracy. Use it for more precise transcriptions."
|
||
},
|
||
"gpt-5": {
|
||
"description": "The best model for cross-domain coding and agent tasks. GPT-5 achieves breakthroughs in accuracy, speed, reasoning, context recognition, structured thinking, and problem-solving."
|
||
},
|
||
"gpt-5-chat-latest": {
|
||
"description": "The GPT-5 model used in ChatGPT. Combines powerful language understanding and generation capabilities, ideal for conversational interaction applications."
|
||
},
|
||
"gpt-5-codex": {
|
||
"description": "GPT-5 Codex is a GPT-5 variant optimized for agent coding tasks in Codex or similar environments."
|
||
},
|
||
"gpt-5-mini": {
|
||
"description": "A faster, more cost-effective version of GPT-5, suitable for well-defined tasks. Provides quicker response times while maintaining high-quality output."
|
||
},
|
||
"gpt-5-nano": {
|
||
"description": "The fastest and most cost-efficient version of GPT-5. Perfectly suited for applications requiring rapid responses and cost sensitivity."
|
||
},
|
||
"gpt-5-pro": {
|
||
"description": "GPT-5 Pro leverages greater computational power for deeper reasoning and consistently delivers improved answers."
|
||
},
|
||
"gpt-audio": {
|
||
"description": "GPT Audio is a general-purpose chat model designed for audio input and output, supporting audio I/O in the Chat Completions API."
|
||
},
|
||
"gpt-image-1": {
|
||
"description": "ChatGPT native multimodal image generation model."
|
||
},
|
||
"gpt-image-1-mini": {
|
||
"description": "A more cost-effective version of GPT Image 1, natively supporting both text and image inputs with image generation output."
|
||
},
|
||
"gpt-oss-120b": {
|
||
"description": "Access to this model requires an application. GPT-OSS-120B is an open-source large-scale language model released by OpenAI, known for its powerful text generation capabilities."
|
||
},
|
||
"gpt-oss-20b": {
|
||
"description": "Access to this model requires an application. GPT-OSS-20B is an open-source mid-sized language model from OpenAI, offering efficient text generation."
|
||
},
|
||
"gpt-oss:120b": {
|
||
"description": "GPT-OSS 120B is a large open-source language model released by OpenAI, employing MXFP4 quantization technology as a flagship model. It requires multi-GPU or high-performance workstation environments to operate and delivers outstanding performance in complex reasoning, code generation, and multilingual processing, supporting advanced function calls and tool integration."
|
||
},
|
||
"gpt-oss:20b": {
|
||
"description": "GPT-OSS 20B is an open-source large language model released by OpenAI, utilizing MXFP4 quantization technology, suitable for running on high-end consumer GPUs or Apple Silicon Macs. This model excels in dialogue generation, code writing, and reasoning tasks, supporting function calls and tool usage."
|
||
},
|
||
"gpt-realtime": {
|
||
"description": "A general-purpose real-time model supporting real-time text and audio input/output, as well as image input."
|
||
},
|
||
"grok-2-image-1212": {
|
||
"description": "Our latest image generation model can create vivid and realistic images based on text prompts. It performs excellently in image generation for marketing, social media, and entertainment."
|
||
},
|
||
"grok-2-vision-1212": {
|
||
"description": "This model has improved in accuracy, instruction adherence, and multilingual capabilities."
|
||
},
|
||
"grok-3": {
|
||
"description": "A flagship model skilled in data extraction, programming, and text summarization for enterprise applications, with deep knowledge in finance, healthcare, law, and science."
|
||
},
|
||
"grok-3-mini": {
|
||
"description": "A lightweight model that thinks before responding. It runs fast and intelligently, suitable for logical tasks that do not require deep domain knowledge, and can provide raw thought trajectories."
|
||
},
|
||
"grok-4": {
|
||
"description": "Our latest and most powerful flagship model, excelling in natural language processing, mathematical computation, and reasoning — a perfect all-rounder."
|
||
},
|
||
"grok-4-0709": {
|
||
"description": "xAI's Grok 4, featuring strong reasoning capabilities."
|
||
},
|
||
"grok-4-fast-non-reasoning": {
|
||
"description": "We are excited to release Grok 4 Fast, our latest advancement in cost-effective reasoning models."
|
||
},
|
||
"grok-4-fast-reasoning": {
|
||
"description": "We are excited to release Grok 4 Fast, our latest advancement in cost-effective reasoning models."
|
||
},
|
||
"grok-code-fast-1": {
|
||
"description": "We are excited to introduce grok-code-fast-1, a fast and cost-effective inference model that excels in agent coding."
|
||
},
|
||
"groq/compound": {
|
||
"description": "Compound is a composite AI system supported by multiple openly available models already supported in GroqCloud, capable of intelligently and selectively using tools to answer user queries."
|
||
},
|
||
"groq/compound-mini": {
|
||
"description": "Compound-mini is a composite AI system supported by publicly available models already supported in GroqCloud, capable of intelligently and selectively using tools to answer user queries."
|
||
},
|
||
"gryphe/mythomax-l2-13b": {
|
||
"description": "MythoMax l2 13B is a language model that combines creativity and intelligence by merging multiple top models."
|
||
},
|
||
"hunyuan-a13b": {
|
||
"description": "Hunyuan's first hybrid reasoning model, an upgraded version of hunyuan-standard-256K, with a total of 80 billion parameters and 13 billion activated parameters. The default mode is slow thinking, supporting fast and slow thinking mode switching via parameters or instructions, with the switch implemented by adding 'query' prefix or 'no_think'. Overall capabilities are comprehensively improved compared to the previous generation, especially in mathematics, science, long text comprehension, and agent abilities."
|
||
},
|
||
"hunyuan-code": {
|
||
"description": "The latest code generation model from Hunyuan, trained on a base model with 200B high-quality code data, iteratively trained for six months with high-quality SFT data, increasing the context window length to 8K. It ranks among the top in automatic evaluation metrics for code generation across five major programming languages, and performs in the first tier for comprehensive human quality assessments across ten aspects of coding tasks."
|
||
},
|
||
"hunyuan-functioncall": {
|
||
"description": "The latest MOE architecture FunctionCall model from Hunyuan, trained on high-quality FunctionCall data, with a context window of 32K, leading in multiple dimensions of evaluation metrics."
|
||
},
|
||
"hunyuan-large": {
|
||
"description": "The Hunyuan-large model has a total parameter count of approximately 389B, with about 52B active parameters, making it the largest and most effective open-source MoE model in the industry based on the Transformer architecture."
|
||
},
|
||
"hunyuan-large-longcontext": {
|
||
"description": "Specializes in handling long text tasks such as document summarization and question answering, while also capable of general text generation tasks. It excels in analyzing and generating long texts, effectively addressing complex and detailed long-form content processing needs."
|
||
},
|
||
"hunyuan-large-vision": {
|
||
"description": "This model is designed for image-text understanding scenarios. It is a vision-language large model based on Hunyuan Large training, supporting multi-image plus text input at any resolution to generate textual content. It focuses on image-text understanding tasks and shows significant improvements in multilingual image-text comprehension."
|
||
},
|
||
"hunyuan-lite": {
|
||
"description": "Upgraded to a MOE structure with a context window of 256k, leading many open-source models in various NLP, coding, mathematics, and industry benchmarks."
|
||
},
|
||
"hunyuan-lite-vision": {
|
||
"description": "The latest 7B multimodal model from Hunyuan, with a context window of 32K, supports multimodal dialogue in both Chinese and English scenarios, image object recognition, document table understanding, and multimodal mathematics, outperforming 7B competing models across multiple evaluation dimensions."
|
||
},
|
||
"hunyuan-pro": {
|
||
"description": "A trillion-parameter scale MOE-32K long text model. Achieves absolute leading levels across various benchmarks, capable of handling complex instructions and reasoning, with advanced mathematical abilities, supporting function calls, and optimized for applications in multilingual translation, finance, law, and healthcare."
|
||
},
|
||
"hunyuan-role": {
|
||
"description": "The latest role-playing model from Hunyuan, fine-tuned and trained by Hunyuan's official team, based on the Hunyuan model combined with role-playing scenario datasets for enhanced foundational performance in role-playing contexts."
|
||
},
|
||
"hunyuan-standard": {
|
||
"description": "Utilizes a superior routing strategy while alleviating issues of load balancing and expert convergence. For long texts, the needle-in-a-haystack metric reaches 99.9%. MOE-32K offers a relatively higher cost-performance ratio, balancing effectiveness and price while enabling processing of long text inputs."
|
||
},
|
||
"hunyuan-standard-256K": {
|
||
"description": "Utilizes a superior routing strategy while alleviating issues of load balancing and expert convergence. For long texts, the needle-in-a-haystack metric reaches 99.9%. MOE-256K further breaks through in length and effectiveness, greatly expanding the input length capacity."
|
||
},
|
||
"hunyuan-standard-vision": {
|
||
"description": "The latest multimodal model from Hunyuan, supporting multilingual responses with balanced capabilities in both Chinese and English."
|
||
},
|
||
"hunyuan-t1-20250321": {
|
||
"description": "Comprehensively builds model capabilities in both arts and sciences, with strong long-text information capture ability. Supports reasoning and answering various scientific questions, including mathematics, logic, science, and code, of varying difficulty."
|
||
},
|
||
"hunyuan-t1-20250403": {
|
||
"description": "Enhance project-level code generation capabilities; improve the quality of text generation and writing; enhance multi-turn topic understanding, ToB instruction compliance, and word comprehension; optimize issues with mixed traditional and simplified Chinese as well as mixed Chinese and English output."
|
||
},
|
||
"hunyuan-t1-20250529": {
|
||
"description": "Optimized for text creation and essay writing, with enhanced abilities in frontend coding, mathematics, logical reasoning, and improved instruction-following capabilities."
|
||
},
|
||
"hunyuan-t1-20250711": {
|
||
"description": "Significantly improves high-difficulty mathematics, logic, and coding capabilities, optimizes model output stability, and enhances long-text processing ability."
|
||
},
|
||
"hunyuan-t1-latest": {
|
||
"description": "Significantly enhances the main model's slow-thinking capabilities in advanced mathematics, complex reasoning, difficult coding, instruction adherence, and text creation quality."
|
||
},
|
||
"hunyuan-t1-vision-20250619": {
|
||
"description": "The latest Hunyuan t1-vision multimodal deep thinking model supports native long Chain-of-Thought reasoning across modalities, comprehensively improving over the previous default version."
|
||
},
|
||
"hunyuan-t1-vision-20250916": {
|
||
"description": "The latest Hunyuan t1-vision model excels in visual deep reasoning. Compared to the previous version, it offers significant enhancements in general image-text Q&A, visual localization, OCR, chart interpretation, problem-solving from photos, and image-based creative tasks, with notable improvements in English and low-resource language capabilities."
|
||
},
|
||
"hunyuan-turbo": {
|
||
"description": "The preview version of the next-generation Hunyuan large language model, featuring a brand-new mixed expert model (MoE) structure, which offers faster inference efficiency and stronger performance compared to Hunyuan Pro."
|
||
},
|
||
"hunyuan-turbo-20241223": {
|
||
"description": "This version optimizes: data instruction scaling, significantly enhancing the model's generalization capabilities; greatly improving mathematical, coding, and logical reasoning abilities; optimizing text understanding and word comprehension capabilities; enhancing the quality of content generation in text creation."
|
||
},
|
||
"hunyuan-turbo-latest": {
|
||
"description": "General experience optimization, including NLP understanding, text creation, casual conversation, knowledge Q&A, translation, and domain-specific tasks; enhanced personification and emotional intelligence of the model; improved the model's ability to clarify when intentions are ambiguous; enhanced handling of word parsing-related questions; improved the quality and interactivity of creative outputs; enhanced multi-turn experience."
|
||
},
|
||
"hunyuan-turbo-vision": {
|
||
"description": "The next-generation flagship visual language model from Hunyuan, utilizing a new mixed expert model (MoE) structure, with comprehensive improvements in basic recognition, content creation, knowledge Q&A, and analytical reasoning capabilities compared to the previous generation model."
|
||
},
|
||
"hunyuan-turbos-20250313": {
|
||
"description": "Standardize the style of mathematical problem-solving steps and strengthen multi-turn math Q&A. Optimize text creation by refining response style, removing AI-like tone, and adding literary flair."
|
||
},
|
||
"hunyuan-turbos-20250416": {
|
||
"description": "Upgrade the pre-training foundation to strengthen instruction understanding and compliance; enhance STEM abilities in mathematics, coding, logic, and science during alignment; improve humanities capabilities such as creative writing quality, text comprehension, translation accuracy, and knowledge Q&A; boost agent capabilities across various domains, with a focus on multi-turn dialogue understanding."
|
||
},
|
||
"hunyuan-turbos-20250604": {
|
||
"description": "Upgraded pretraining foundation with improved writing and reading comprehension skills, significantly enhanced coding and STEM abilities, and continuous improvements in following complex instructions."
|
||
},
|
||
"hunyuan-turbos-20250926": {
|
||
"description": "Pre-training base data quality upgrade. Optimized post-training phase strategies to continuously enhance Agent capabilities, English and minor language proficiency, instruction compliance, coding, and scientific reasoning."
|
||
},
|
||
"hunyuan-turbos-latest": {
|
||
"description": "The latest version of hunyuan-TurboS, the flagship model of Hunyuan, features enhanced reasoning capabilities and improved user experience."
|
||
},
|
||
"hunyuan-turbos-longtext-128k-20250325": {
|
||
"description": "Specializes in handling long text tasks such as document summarization and question answering, while also capable of general text generation tasks. It excels in analyzing and generating long texts, effectively addressing complex and detailed long-form content processing needs."
|
||
},
|
||
"hunyuan-turbos-role-plus": {
|
||
"description": "The latest Hunyuan role-playing model, officially fine-tuned and trained by Hunyuan. It is further trained on role-playing scenario datasets based on the Hunyuan model, delivering better foundational performance in role-playing contexts."
|
||
},
|
||
"hunyuan-turbos-vision": {
|
||
"description": "This model is designed for image-text understanding scenarios and is based on Hunyuan's latest turbos architecture. It is a next-generation flagship vision-language model focusing on image-text understanding tasks, including image-based entity recognition, knowledge Q&A, copywriting, and photo-based problem solving, with comprehensive improvements over the previous generation."
|
||
},
|
||
"hunyuan-turbos-vision-20250619": {
|
||
"description": "The latest Hunyuan turbos-vision flagship vision-language model offers comprehensive improvements over the previous default version in image-text understanding tasks, including image-based entity recognition, knowledge Q&A, copywriting, and photo-based problem solving."
|
||
},
|
||
"hunyuan-vision": {
|
||
"description": "The latest multimodal model from Hunyuan, supporting image + text input to generate textual content."
|
||
},
|
||
"image-01": {
|
||
"description": "A brand-new image generation model with delicate visual performance, supporting text-to-image and image-to-image generation."
|
||
},
|
||
"image-01-live": {
|
||
"description": "An image generation model with delicate visual performance, supporting text-to-image generation and style setting."
|
||
},
|
||
"imagen-4.0-fast-generate-001": {
|
||
"description": "Imagen 4th-generation text-to-image model, Fast version"
|
||
},
|
||
"imagen-4.0-generate-001": {
|
||
"description": "Imagen 4th-generation text-to-image model series"
|
||
},
|
||
"imagen-4.0-generate-preview-06-06": {
|
||
"description": "Imagen 4th generation text-to-image model series"
|
||
},
|
||
"imagen-4.0-ultra-generate-001": {
|
||
"description": "Imagen 4th-generation text-to-image model, Ultra version"
|
||
},
|
||
"imagen-4.0-ultra-generate-preview-06-06": {
|
||
"description": "Imagen 4th generation text-to-image model series Ultra version"
|
||
},
|
||
"inception/mercury-coder-small": {
|
||
"description": "Mercury Coder Small is ideal for code generation, debugging, and refactoring tasks, offering minimal latency."
|
||
},
|
||
"inclusionAI/Ling-1T": {
|
||
"description": "Ling-1T is the first flagship non-thinking model in the 'Ling 2.0' series, featuring 1 trillion total parameters and approximately 50 billion active parameters per token. Built on the Ling 2.0 architecture, Ling-1T aims to push the boundaries of efficient reasoning and scalable cognition. Ling-1T-base is trained on over 20 trillion high-quality, reasoning-intensive tokens."
|
||
},
|
||
"inclusionAI/Ling-flash-2.0": {
|
||
"description": "Ling-flash-2.0 is the third model in the Ling 2.0 architecture series released by Ant Group's Bailing team. It is a mixture-of-experts (MoE) model with a total of 100 billion parameters, but activates only 6.1 billion parameters per token (4.8 billion non-embedding). As a lightweight configuration model, Ling-flash-2.0 demonstrates performance comparable to or surpassing 40-billion-parameter dense models and larger MoE models across multiple authoritative benchmarks. The model aims to explore efficient pathways under the consensus that \"large models equal large parameters\" through extreme architectural design and training strategies."
|
||
},
|
||
"inclusionAI/Ling-mini-2.0": {
|
||
"description": "Ling-mini-2.0 is a small-sized, high-performance large language model based on the MoE architecture. It has 16 billion total parameters but activates only 1.4 billion per token (789 million non-embedding), achieving extremely high generation speed. Thanks to the efficient MoE design and large-scale high-quality training data, despite activating only 1.4 billion parameters, Ling-mini-2.0 still delivers top-tier performance comparable to dense LLMs under 10 billion parameters and larger MoE models on downstream tasks."
|
||
},
|
||
"inclusionAI/Ring-1T": {
|
||
"description": "Ring-1T is a trillion-parameter open-source cognitive model released by the Bailing team. It is trained on the Ling 2.0 architecture and the Ling-1T-base model, with 1 trillion total parameters and 50 billion active parameters. It supports context windows up to 128K and is optimized through large-scale verifiable reward reinforcement learning."
|
||
},
|
||
"inclusionAI/Ring-flash-2.0": {
|
||
"description": "Ring-flash-2.0 is a high-performance reasoning model deeply optimized based on Ling-flash-2.0-base. It employs a mixture-of-experts (MoE) architecture with a total of 100 billion parameters but activates only 6.1 billion parameters per inference. The model uses the proprietary icepop algorithm to solve the instability issues of MoE large models during reinforcement learning (RL) training, enabling continuous improvement of complex reasoning capabilities over long training cycles. Ring-flash-2.0 has achieved significant breakthroughs in challenging benchmarks such as math competitions, code generation, and logical reasoning. Its performance not only surpasses top dense models under 40 billion parameters but also rivals larger open-source MoE models and closed-source high-performance reasoning models. Although focused on complex reasoning, it also performs well in creative writing tasks. Additionally, thanks to its efficient architecture, Ring-flash-2.0 delivers strong performance with high-speed inference, significantly reducing deployment costs for reasoning models in high-concurrency scenarios."
|
||
},
|
||
"internlm/internlm2_5-7b-chat": {
|
||
"description": "InternLM2.5 offers intelligent dialogue solutions across multiple scenarios."
|
||
},
|
||
"internlm2.5-latest": {
|
||
"description": "Our latest model series, featuring exceptional reasoning performance, supporting a context length of 1M, and enhanced instruction following and tool invocation capabilities."
|
||
},
|
||
"internlm3-latest": {
|
||
"description": "Our latest model series boasts exceptional inference performance, leading the pack among open-source models of similar scale. It defaults to our most recently released InternLM3 series models."
|
||
},
|
||
"internvl2.5-latest": {
|
||
"description": "The InternVL2.5 version we continue to maintain, offering excellent and stable performance. It defaults to our latest released InternVL2.5 series model, currently pointing to internvl2.5-78b."
|
||
},
|
||
"internvl3-latest": {
|
||
"description": "Our latest released multimodal large model, featuring enhanced image-text understanding capabilities and long-sequence image comprehension, performs on par with top proprietary models. It defaults to our latest released InternVL series model, currently pointing to internvl3-78b."
|
||
},
|
||
"irag-1.0": {
|
||
"description": "Baidu's self-developed iRAG (image-based Retrieval-Augmented Generation) technology combines Baidu Search's hundreds of millions of image resources with powerful foundational model capabilities to generate ultra-realistic images. The overall effect far surpasses native text-to-image systems, eliminating the AI-generated feel while maintaining low cost. iRAG features hallucination-free, ultra-realistic, and instant retrieval characteristics."
|
||
},
|
||
"jamba-large": {
|
||
"description": "Our most powerful and advanced model, designed for handling complex enterprise-level tasks with exceptional performance."
|
||
},
|
||
"jamba-mini": {
|
||
"description": "The most efficient model in its class, balancing speed and quality while maintaining a smaller size."
|
||
},
|
||
"jina-deepsearch-v1": {
|
||
"description": "DeepSearch combines web search, reading, and reasoning for comprehensive investigations. You can think of it as an agent that takes on your research tasks—it conducts extensive searches and iterates multiple times before providing answers. This process involves ongoing research, reasoning, and problem-solving from various angles. This fundamentally differs from standard large models that generate answers directly from pre-trained data and traditional RAG systems that rely on one-time surface searches."
|
||
},
|
||
"kimi-k2": {
|
||
"description": "Kimi-K2 is a MoE architecture base model launched by Moonshot AI with exceptional coding and agent capabilities, featuring 1 trillion total parameters and 32 billion activated parameters. In benchmark tests across general knowledge reasoning, programming, mathematics, and agent tasks, the K2 model outperforms other mainstream open-source models."
|
||
},
|
||
"kimi-k2-0711-preview": {
|
||
"description": "kimi-k2 is a MoE architecture base model with powerful coding and agent capabilities, totaling 1 trillion parameters with 32 billion active parameters. In benchmark tests across general knowledge reasoning, programming, mathematics, and agent tasks, the K2 model outperforms other mainstream open-source models."
|
||
},
|
||
"kimi-k2-0905-preview": {
|
||
"description": "The kimi-k2-0905-preview model has a context length of 256k, featuring stronger Agentic Coding capabilities, more outstanding aesthetics and practicality of frontend code, and better context understanding."
|
||
},
|
||
"kimi-k2-instruct": {
|
||
"description": "Kimi K2 Instruct is a large language model developed by Moonshot AI, featuring ultra-long context processing capabilities."
|
||
},
|
||
"kimi-k2-turbo-preview": {
|
||
"description": "Kimi-K2 is a Mixture-of-Experts (MoE) foundation model with exceptional coding and agent capabilities, featuring 1T total parameters and 32B activated parameters. In benchmark evaluations across core categories — general knowledge reasoning, programming, mathematics, and agent tasks — the K2 model outperforms other leading open-source models."
|
||
},
|
||
"kimi-k2:1t": {
|
||
"description": "Kimi K2 is a large-scale Mixture of Experts (MoE) language model developed by Moon's Dark Side AI, featuring a total of 1 trillion parameters and 32 billion activated parameters per forward pass. It is optimized for agent capabilities, including advanced tool usage, reasoning, and code synthesis."
|
||
},
|
||
"kimi-latest": {
|
||
"description": "The Kimi Smart Assistant product uses the latest Kimi large model, which may include features that are not yet stable. It supports image understanding and will automatically select the 8k/32k/128k model as the billing model based on the length of the request context."
|
||
},
|
||
"kimi-thinking-preview": {
|
||
"description": "kimi-thinking-preview is a multimodal thinking model provided by Dark Side of the Moon, featuring multimodal and general reasoning abilities. It excels at deep reasoning to help solve more complex and challenging problems."
|
||
},
|
||
"learnlm-1.5-pro-experimental": {
|
||
"description": "LearnLM is an experimental, task-specific language model trained to align with learning science principles, capable of following systematic instructions in teaching and learning scenarios, acting as an expert tutor, among other roles."
|
||
},
|
||
"learnlm-2.0-flash-experimental": {
|
||
"description": "LearnLM is an experimental, task-specific language model trained to align with the principles of learning science, capable of following systematic instructions in teaching and learning scenarios, acting as an expert tutor, among other roles."
|
||
},
|
||
"lite": {
|
||
"description": "Spark Lite is a lightweight large language model with extremely low latency and efficient processing capabilities, completely free and open, supporting real-time online search functionality. Its quick response feature makes it excel in inference applications and model fine-tuning on low-power devices, providing users with excellent cost-effectiveness and intelligent experiences, particularly in knowledge Q&A, content generation, and search scenarios."
|
||
},
|
||
"llama-3.1-70b-versatile": {
|
||
"description": "Llama 3.1 70B provides enhanced AI reasoning capabilities, suitable for complex applications, supporting extensive computational processing while ensuring efficiency and accuracy."
|
||
},
|
||
"llama-3.1-8b-instant": {
|
||
"description": "Llama 3.1 8B is a high-performance model that offers rapid text generation capabilities, making it ideal for applications requiring large-scale efficiency and cost-effectiveness."
|
||
},
|
||
"llama-3.1-instruct": {
|
||
"description": "The Llama 3.1 instruction-tuned model is optimized for conversational scenarios, outperforming many existing open-source chat models on common industry benchmarks."
|
||
},
|
||
"llama-3.2-11b-vision-instruct": {
|
||
"description": "Excellent image reasoning capabilities on high-resolution images, suitable for visual understanding applications."
|
||
},
|
||
"llama-3.2-11b-vision-preview": {
|
||
"description": "Llama 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"llama-3.2-90b-vision-instruct": {
|
||
"description": "Advanced image reasoning capabilities suitable for visual understanding agent applications."
|
||
},
|
||
"llama-3.2-90b-vision-preview": {
|
||
"description": "Llama 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"llama-3.2-vision-instruct": {
|
||
"description": "The Llama 3.2-Vision instruction-tuned model is optimized for visual recognition, image reasoning, image captioning, and answering general questions related to images."
|
||
},
|
||
"llama-3.3-70b": {
|
||
"description": "Llama 3.3 70B: A mid-to-large scale Llama model that balances reasoning power and throughput."
|
||
},
|
||
"llama-3.3-70b-versatile": {
|
||
"description": "Meta Llama 3.3 is a multilingual large language model (LLM) with 70 billion parameters (text input/text output), featuring pre-training and instruction-tuning. The instruction-tuned pure text model of Llama 3.3 is optimized for multilingual conversational use cases and outperforms many available open-source and closed chat models on common industry benchmarks."
|
||
},
|
||
"llama-3.3-instruct": {
|
||
"description": "The Llama 3.3 instruction-tuned model is optimized for conversational scenarios, outperforming many existing open-source chat models on common industry benchmarks."
|
||
},
|
||
"llama-4-scout-17b-16e-instruct": {
|
||
"description": "Llama 4 Scout: A high-performance Llama model optimized for scenarios requiring high throughput and low latency."
|
||
},
|
||
"llama3-70b-8192": {
|
||
"description": "Meta Llama 3 70B provides unparalleled complexity handling capabilities, tailored for high-demand projects."
|
||
},
|
||
"llama3-8b-8192": {
|
||
"description": "Meta Llama 3 8B delivers high-quality reasoning performance, suitable for diverse application needs."
|
||
},
|
||
"llama3-groq-70b-8192-tool-use-preview": {
|
||
"description": "Llama 3 Groq 70B Tool Use offers powerful tool invocation capabilities, supporting efficient processing of complex tasks."
|
||
},
|
||
"llama3-groq-8b-8192-tool-use-preview": {
|
||
"description": "Llama 3 Groq 8B Tool Use is a model optimized for efficient tool usage, supporting fast parallel computation."
|
||
},
|
||
"llama3.1": {
|
||
"description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex dialogues, multilingual translation, and data analysis."
|
||
},
|
||
"llama3.1-8b": {
|
||
"description": "Llama 3.1 8B: A lightweight, low-latency variant of Llama, well-suited for real-time inference and interactive applications."
|
||
},
|
||
"llama3.1:405b": {
|
||
"description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex dialogues, multilingual translation, and data analysis."
|
||
},
|
||
"llama3.1:70b": {
|
||
"description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex dialogues, multilingual translation, and data analysis."
|
||
},
|
||
"llava": {
|
||
"description": "LLaVA is a multimodal model that combines a visual encoder with Vicuna for powerful visual and language understanding."
|
||
},
|
||
"llava-v1.5-7b-4096-preview": {
|
||
"description": "LLaVA 1.5 7B offers integrated visual processing capabilities, generating complex outputs from visual information inputs."
|
||
},
|
||
"llava:13b": {
|
||
"description": "LLaVA is a multimodal model that combines a visual encoder with Vicuna for powerful visual and language understanding."
|
||
},
|
||
"llava:34b": {
|
||
"description": "LLaVA is a multimodal model that combines a visual encoder with Vicuna for powerful visual and language understanding."
|
||
},
|
||
"magistral-medium-latest": {
|
||
"description": "Magistral Medium 1.2 is a cutting-edge inference model with visual support, released by Mistral AI in September 2025."
|
||
},
|
||
"magistral-small-2509": {
|
||
"description": "Magistral Small 1.2 is an open-source compact inference model with visual support, released by Mistral AI in September 2025."
|
||
},
|
||
"mathstral": {
|
||
"description": "MathΣtral is designed for scientific research and mathematical reasoning, providing effective computational capabilities and result interpretation."
|
||
},
|
||
"max-32k": {
|
||
"description": "Spark Max 32K is configured with large context processing capabilities, enhanced contextual understanding, and logical reasoning abilities, supporting text input of 32K tokens, suitable for long document reading, private knowledge Q&A, and other scenarios."
|
||
},
|
||
"megrez-3b-instruct": {
|
||
"description": "Megrez 3B Instruct is a compact and efficient model developed by Wuwen Xinqiong."
|
||
},
|
||
"meta-llama-3-70b-instruct": {
|
||
"description": "A powerful 70-billion parameter model excelling in reasoning, coding, and broad language applications."
|
||
},
|
||
"meta-llama-3-8b-instruct": {
|
||
"description": "A versatile 8-billion parameter model optimized for dialogue and text generation tasks."
|
||
},
|
||
"meta-llama-3.1-405b-instruct": {
|
||
"description": "The Llama 3.1 instruction-tuned text-only models are optimized for multilingual dialogue use cases and outperform many of the available open-source and closed chat models on common industry benchmarks."
|
||
},
|
||
"meta-llama-3.1-70b-instruct": {
|
||
"description": "The Llama 3.1 instruction-tuned text-only models are optimized for multilingual dialogue use cases and outperform many of the available open-source and closed chat models on common industry benchmarks."
|
||
},
|
||
"meta-llama-3.1-8b-instruct": {
|
||
"description": "The Llama 3.1 instruction-tuned text-only models are optimized for multilingual dialogue use cases and outperform many of the available open-source and closed chat models on common industry benchmarks."
|
||
},
|
||
"meta-llama/Llama-2-13b-chat-hf": {
|
||
"description": "LLaMA-2 Chat (13B) offers excellent language processing capabilities and outstanding interactive experiences."
|
||
},
|
||
"meta-llama/Llama-2-70b-hf": {
|
||
"description": "LLaMA-2 provides excellent language processing capabilities and outstanding interactive experiences."
|
||
},
|
||
"meta-llama/Llama-3-70b-chat-hf": {
|
||
"description": "LLaMA-3 Chat (70B) is a powerful chat model that supports complex conversational needs."
|
||
},
|
||
"meta-llama/Llama-3-8b-chat-hf": {
|
||
"description": "LLaMA-3 Chat (8B) provides multilingual support, covering a rich array of domain knowledge."
|
||
},
|
||
"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo": {
|
||
"description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"meta-llama/Llama-3.2-3B-Instruct-Turbo": {
|
||
"description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo": {
|
||
"description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"meta-llama/Llama-3.3-70B-Instruct-Turbo": {
|
||
"description": "Meta Llama 3.3 is a multilingual large language model (LLM) that is a pre-trained and instruction-tuned generative model within the 70B (text input/text output) framework. The instruction-tuned pure text model is optimized for multilingual dialogue use cases and outperforms many available open-source and closed chat models on common industry benchmarks."
|
||
},
|
||
"meta-llama/Llama-Vision-Free": {
|
||
"description": "LLaMA 3.2 is designed for tasks involving both visual and textual data. It excels in tasks like image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"meta-llama/Meta-Llama-3-70B-Instruct-Lite": {
|
||
"description": "Llama 3 70B Instruct Lite is suitable for environments requiring high performance and low latency."
|
||
},
|
||
"meta-llama/Meta-Llama-3-70B-Instruct-Turbo": {
|
||
"description": "Llama 3 70B Instruct Turbo offers exceptional language understanding and generation capabilities, suitable for the most demanding computational tasks."
|
||
},
|
||
"meta-llama/Meta-Llama-3-8B-Instruct-Lite": {
|
||
"description": "Llama 3 8B Instruct Lite is designed for resource-constrained environments, providing excellent balanced performance."
|
||
},
|
||
"meta-llama/Meta-Llama-3-8B-Instruct-Turbo": {
|
||
"description": "Llama 3 8B Instruct Turbo is a high-performance large language model, supporting a wide range of application scenarios."
|
||
},
|
||
"meta-llama/Meta-Llama-3.1-405B-Instruct": {
|
||
"description": "LLaMA 3.1 405B is a powerful model for pre-training and instruction tuning."
|
||
},
|
||
"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": {
|
||
"description": "The 405B Llama 3.1 Turbo model provides massive context support for big data processing, excelling in large-scale AI applications."
|
||
},
|
||
"meta-llama/Meta-Llama-3.1-70B": {
|
||
"description": "Llama 3.1 is a leading model launched by Meta, supporting up to 405B parameters, applicable in complex conversations, multilingual translation, and data analysis."
|
||
},
|
||
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": {
|
||
"description": "Llama 3.1 70B model is finely tuned for high-load applications, quantized to FP8 for enhanced computational efficiency and accuracy, ensuring outstanding performance in complex scenarios."
|
||
},
|
||
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": {
|
||
"description": "Llama 3.1 8B model utilizes FP8 quantization, supporting up to 131,072 context tokens, making it a standout in open-source models, excelling in complex tasks and outperforming many industry benchmarks."
|
||
},
|
||
"meta-llama/llama-3-70b-instruct": {
|
||
"description": "Llama 3 70B Instruct is optimized for high-quality conversational scenarios, demonstrating excellent performance in various human evaluations."
|
||
},
|
||
"meta-llama/llama-3-8b-instruct": {
|
||
"description": "Llama 3 8B Instruct is optimized for high-quality conversational scenarios, performing better than many closed-source models."
|
||
},
|
||
"meta-llama/llama-3.1-70b-instruct": {
|
||
"description": "Llama 3.1 70B Instruct is designed for high-quality conversations, excelling in human evaluations, particularly in highly interactive scenarios."
|
||
},
|
||
"meta-llama/llama-3.1-8b-instruct": {
|
||
"description": "Llama 3.1 8B Instruct is the latest version released by Meta, optimized for high-quality conversational scenarios, outperforming many leading closed-source models."
|
||
},
|
||
"meta-llama/llama-3.1-8b-instruct:free": {
|
||
"description": "LLaMA 3.1 offers multilingual support and is one of the industry's leading generative models."
|
||
},
|
||
"meta-llama/llama-3.2-11b-vision-instruct": {
|
||
"description": "LLaMA 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"meta-llama/llama-3.2-3b-instruct": {
|
||
"description": "meta-llama/llama-3.2-3b-instruct"
|
||
},
|
||
"meta-llama/llama-3.2-90b-vision-instruct": {
|
||
"description": "LLaMA 3.2 is designed to handle tasks that combine visual and textual data. It excels in tasks such as image description and visual question answering, bridging the gap between language generation and visual reasoning."
|
||
},
|
||
"meta-llama/llama-3.3-70b-instruct": {
|
||
"description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at an extremely low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023."
|
||
},
|
||
"meta-llama/llama-3.3-70b-instruct:free": {
|
||
"description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at an extremely low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023."
|
||
},
|
||
"meta.llama3-1-405b-instruct-v1:0": {
|
||
"description": "Meta Llama 3.1 405B Instruct is the largest and most powerful model in the Llama 3.1 Instruct series. It is a highly advanced conversational reasoning and synthetic data generation model, which can also serve as a foundation for specialized continuous pre-training or fine-tuning in specific domains. The multilingual large language models (LLMs) provided by Llama 3.1 are a set of pre-trained, instruction-tuned generative models, including sizes of 8B, 70B, and 405B (text input/output). The instruction-tuned text models (8B, 70B, 405B) are optimized for multilingual conversational use cases and have outperformed many available open-source chat models in common industry benchmarks. Llama 3.1 is designed for commercial and research purposes across multiple languages. The instruction-tuned text models are suitable for assistant-like chat, while the pre-trained models can adapt to various natural language generation tasks. The Llama 3.1 models also support improving other models using their outputs, including synthetic data generation and refinement. Llama 3.1 is an autoregressive language model built using an optimized transformer architecture. The tuned versions utilize supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety."
|
||
},
|
||
"meta.llama3-1-70b-instruct-v1:0": {
|
||
"description": "The updated version of Meta Llama 3.1 70B Instruct includes an extended 128K context length, multilingual capabilities, and improved reasoning abilities. The multilingual large language models (LLMs) provided by Llama 3.1 are a set of pre-trained, instruction-tuned generative models, including sizes of 8B, 70B, and 405B (text input/output). The instruction-tuned text models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and have surpassed many available open-source chat models in common industry benchmarks. Llama 3.1 is designed for commercial and research purposes in multiple languages. The instruction-tuned text models are suitable for assistant-like chat, while the pre-trained models can adapt to various natural language generation tasks. The Llama 3.1 model also supports using its outputs to improve other models, including synthetic data generation and refinement. Llama 3.1 is an autoregressive language model using optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety."
|
||
},
|
||
"meta.llama3-1-8b-instruct-v1:0": {
|
||
"description": "The updated version of Meta Llama 3.1 8B Instruct includes an extended 128K context length, multilingual capabilities, and improved reasoning abilities. The multilingual large language models (LLMs) provided by Llama 3.1 are a set of pre-trained, instruction-tuned generative models, including sizes of 8B, 70B, and 405B (text input/output). The instruction-tuned text models (8B, 70B, 405B) are optimized for multilingual dialogue use cases and have surpassed many available open-source chat models in common industry benchmarks. Llama 3.1 is designed for commercial and research purposes in multiple languages. The instruction-tuned text models are suitable for assistant-like chat, while the pre-trained models can adapt to various natural language generation tasks. The Llama 3.1 model also supports using its outputs to improve other models, including synthetic data generation and refinement. Llama 3.1 is an autoregressive language model using optimized transformer architecture. The tuned versions use supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety."
|
||
},
|
||
"meta.llama3-70b-instruct-v1:0": {
|
||
"description": "Meta Llama 3 is an open large language model (LLM) aimed at developers, researchers, and enterprises, designed to help them build, experiment, and responsibly scale their generative AI ideas. As part of a foundational system for global community innovation, it is particularly suitable for content creation, conversational AI, language understanding, R&D, and enterprise applications."
|
||
},
|
||
"meta.llama3-8b-instruct-v1:0": {
|
||
"description": "Meta Llama 3 is an open large language model (LLM) aimed at developers, researchers, and enterprises, designed to help them build, experiment, and responsibly scale their generative AI ideas. As part of a foundational system for global community innovation, it is particularly suitable for those with limited computational power and resources, edge devices, and faster training times."
|
||
},
|
||
"meta/Llama-3.2-11B-Vision-Instruct": {
|
||
"description": "Exhibits excellent image reasoning capabilities on high-resolution images, suitable for visual understanding applications."
|
||
},
|
||
"meta/Llama-3.2-90B-Vision-Instruct": {
|
||
"description": "Advanced image reasoning capabilities designed for visual understanding agent applications."
|
||
},
|
||
"meta/Llama-3.3-70B-Instruct": {
|
||
"description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at a very low cost. Based on the Transformer architecture, it is enhanced through supervised fine-tuning (SFT) and reinforcement learning with human feedback (RLHF) to improve usefulness and safety. Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on multiple industry benchmarks. Knowledge cutoff date: December 2023."
|
||
},
|
||
"meta/Meta-Llama-3-70B-Instruct": {
|
||
"description": "A powerful 70 billion parameter model excelling in reasoning, coding, and a wide range of language applications."
|
||
},
|
||
"meta/Meta-Llama-3-8B-Instruct": {
|
||
"description": "A versatile 8 billion parameter model optimized for dialogue and text generation tasks."
|
||
},
|
||
"meta/Meta-Llama-3.1-405B-Instruct": {
|
||
"description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
|
||
},
|
||
"meta/Meta-Llama-3.1-70B-Instruct": {
|
||
"description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
|
||
},
|
||
"meta/Meta-Llama-3.1-8B-Instruct": {
|
||
"description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models."
|
||
},
|
||
"meta/llama-3-70b": {
|
||
"description": "A 70 billion parameter open-source model finely tuned by Meta for instruction following. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
|
||
},
|
||
"meta/llama-3-8b": {
|
||
"description": "An 8 billion parameter open-source model finely tuned by Meta for instruction following. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
|
||
},
|
||
"meta/llama-3.1-405b-instruct": {
|
||
"description": "An advanced LLM supporting synthetic data generation, knowledge distillation, and reasoning, suitable for chatbots, programming, and domain-specific tasks."
|
||
},
|
||
"meta/llama-3.1-70b": {
|
||
"description": "An updated version of Meta Llama 3 70B Instruct, featuring extended 128K context length, multilingual support, and improved reasoning capabilities."
|
||
},
|
||
"meta/llama-3.1-70b-instruct": {
|
||
"description": "Empowering complex conversations with exceptional context understanding, reasoning capabilities, and text generation abilities."
|
||
},
|
||
"meta/llama-3.1-8b": {
|
||
"description": "Llama 3.1 8B supports a 128K context window, making it ideal for real-time conversational interfaces and data analysis, while offering significant cost savings compared to larger models. Served by Groq using its custom Language Processing Unit (LPU) hardware for fast, efficient inference."
|
||
},
|
||
"meta/llama-3.1-8b-instruct": {
|
||
"description": "An advanced cutting-edge model with language understanding, excellent reasoning capabilities, and text generation abilities."
|
||
},
|
||
"meta/llama-3.2-11b": {
|
||
"description": "Instruction-tuned image reasoning generation model (text + image input / text output), optimized for visual recognition, image reasoning, captioning, and answering general questions about images."
|
||
},
|
||
"meta/llama-3.2-11b-vision-instruct": {
|
||
"description": "A state-of-the-art vision-language model adept at high-quality reasoning from images."
|
||
},
|
||
"meta/llama-3.2-1b": {
|
||
"description": "Text-only model supporting on-device use cases such as multilingual local knowledge retrieval, summarization, and rewriting."
|
||
},
|
||
"meta/llama-3.2-1b-instruct": {
|
||
"description": "A cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
|
||
},
|
||
"meta/llama-3.2-3b": {
|
||
"description": "Text-only model carefully tuned to support on-device use cases such as multilingual local knowledge retrieval, summarization, and rewriting."
|
||
},
|
||
"meta/llama-3.2-3b-instruct": {
|
||
"description": "A cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities."
|
||
},
|
||
"meta/llama-3.2-90b": {
|
||
"description": "Instruction-tuned image reasoning generation model (text + image input / text output), optimized for visual recognition, image reasoning, captioning, and answering general questions about images."
|
||
},
|
||
"meta/llama-3.2-90b-vision-instruct": {
|
||
"description": "A state-of-the-art vision-language model adept at high-quality reasoning from images."
|
||
},
|
||
"meta/llama-3.3-70b": {
|
||
"description": "The perfect blend of performance and efficiency. This model supports high-performance conversational AI, designed for content creation, enterprise applications, and research, offering advanced language understanding capabilities including text summarization, classification, sentiment analysis, and code generation."
|
||
},
|
||
"meta/llama-3.3-70b-instruct": {
|
||
"description": "An advanced LLM skilled in reasoning, mathematics, common sense, and function calling."
|
||
},
|
||
"meta/llama-4-maverick": {
|
||
"description": "The Llama 4 model family consists of native multimodal AI models supporting text and multimodal experiences. These models leverage a Mixture of Experts architecture to deliver industry-leading performance in text and image understanding. Llama 4 Maverick, a 17 billion parameter model with 128 experts, is served by DeepInfra."
|
||
},
|
||
"meta/llama-4-scout": {
|
||
"description": "The Llama 4 model family consists of native multimodal AI models supporting text and multimodal experiences. These models leverage a Mixture of Experts architecture to deliver industry-leading performance in text and image understanding. Llama 4 Scout, a 17 billion parameter model with 16 experts, is served by DeepInfra."
|
||
},
|
||
"microsoft/Phi-3-medium-128k-instruct": {
|
||
"description": "The same Phi-3-medium model but with a larger context size, suitable for RAG or few-shot prompting."
|
||
},
|
||
"microsoft/Phi-3-medium-4k-instruct": {
|
||
"description": "A 14 billion parameter model with higher quality than Phi-3-mini, focusing on high-quality, reasoning-intensive data."
|
||
},
|
||
"microsoft/Phi-3-mini-128k-instruct": {
|
||
"description": "The same Phi-3-mini model but with a larger context size, suitable for RAG or few-shot prompting."
|
||
},
|
||
"microsoft/Phi-3-mini-4k-instruct": {
|
||
"description": "The smallest member of the Phi-3 family, optimized for quality and low latency."
|
||
},
|
||
"microsoft/Phi-3-small-128k-instruct": {
|
||
"description": "The same Phi-3-small model but with a larger context size, suitable for RAG or few-shot prompting."
|
||
},
|
||
"microsoft/Phi-3-small-8k-instruct": {
|
||
"description": "A 7 billion parameter model with higher quality than Phi-3-mini, focusing on high-quality, reasoning-intensive data."
|
||
},
|
||
"microsoft/Phi-3.5-mini-instruct": {
|
||
"description": "An updated version of the Phi-3-mini model."
|
||
},
|
||
"microsoft/Phi-3.5-vision-instruct": {
|
||
"description": "An updated version of the Phi-3-vision model."
|
||
},
|
||
"microsoft/WizardLM-2-8x22B": {
|
||
"description": "WizardLM 2 is a language model provided by Microsoft AI, excelling in complex dialogues, multilingual capabilities, reasoning, and intelligent assistant tasks."
|
||
},
|
||
"microsoft/wizardlm-2-8x22b": {
|
||
"description": "WizardLM-2 8x22B is Microsoft's state-of-the-art Wizard model, demonstrating extremely competitive performance."
|
||
},
|
||
"minicpm-v": {
|
||
"description": "MiniCPM-V is a next-generation multimodal large model launched by OpenBMB, boasting exceptional OCR recognition and multimodal understanding capabilities, supporting a wide range of application scenarios."
|
||
},
|
||
"ministral-3b-latest": {
|
||
"description": "Ministral 3B is Mistral's top-tier edge model."
|
||
},
|
||
"ministral-8b-latest": {
|
||
"description": "Ministral 8B is Mistral's cost-effective edge model."
|
||
},
|
||
"mistral": {
|
||
"description": "Mistral is a 7B model released by Mistral AI, suitable for diverse language processing needs."
|
||
},
|
||
"mistral-ai/Mistral-Large-2411": {
|
||
"description": "Mistral's flagship model, ideal for large-scale reasoning or highly specialized complex tasks such as synthetic text generation, code generation, RAG, or agents."
|
||
},
|
||
"mistral-ai/Mistral-Nemo": {
|
||
"description": "Mistral Nemo is a cutting-edge large language model (LLM) with state-of-the-art reasoning, world knowledge, and coding capabilities in its size category."
|
||
},
|
||
"mistral-ai/mistral-small-2503": {
|
||
"description": "Mistral Small is suitable for any language-based task requiring high efficiency and low latency."
|
||
},
|
||
"mistral-large": {
|
||
"description": "Mixtral Large is Mistral's flagship model, combining capabilities in code generation, mathematics, and reasoning, supporting a 128k context window."
|
||
},
|
||
"mistral-large-instruct": {
|
||
"description": "Mistral-Large-Instruct-2407 is an advanced dense large language model (LLM) with 123 billion parameters, featuring state-of-the-art reasoning, knowledge, and coding capabilities."
|
||
},
|
||
"mistral-large-latest": {
|
||
"description": "Mistral Large is the flagship model, excelling in multilingual tasks, complex reasoning, and code generation, making it an ideal choice for high-end applications."
|
||
},
|
||
"mistral-medium-latest": {
|
||
"description": "Mistral Medium 3 offers state-of-the-art performance at 8 times the cost, fundamentally simplifying enterprise deployment."
|
||
},
|
||
"mistral-nemo": {
|
||
"description": "Mistral Nemo, developed in collaboration with Mistral AI and NVIDIA, is a high-performance 12B model."
|
||
},
|
||
"mistral-nemo-instruct": {
|
||
"description": "Mistral-Nemo-Instruct-2407 is the instruction-tuned version of the Mistral-Nemo-Base-2407 large language model (LLM)."
|
||
},
|
||
"mistral-small": {
|
||
"description": "Mistral Small can be used for any language-based task that requires high efficiency and low latency."
|
||
},
|
||
"mistral-small-latest": {
|
||
"description": "Mistral Small is a cost-effective, fast, and reliable option suitable for use cases such as translation, summarization, and sentiment analysis."
|
||
},
|
||
"mistral/codestral": {
|
||
"description": "Mistral Codestral 25.01 is a state-of-the-art coding model optimized for low-latency, high-frequency use cases. Proficient in over 80 programming languages, it excels at fill-in-the-middle (FIM), code correction, and test generation tasks."
|
||
},
|
||
"mistral/codestral-embed": {
|
||
"description": "A code embedding model that can be embedded into code databases and repositories to support coding assistants."
|
||
},
|
||
"mistral/devstral-small": {
|
||
"description": "Devstral is an agent large language model for software engineering tasks, making it an excellent choice for software engineering agents."
|
||
},
|
||
"mistral/magistral-medium": {
|
||
"description": "Complex thinking supported by deep understanding, featuring transparent reasoning you can follow and verify. This model maintains high-fidelity reasoning across many languages, even when switching languages mid-task."
|
||
},
|
||
"mistral/magistral-small": {
|
||
"description": "Complex thinking supported by deep understanding, featuring transparent reasoning you can follow and verify. This model maintains high-fidelity reasoning across many languages, even when switching languages mid-task."
|
||
},
|
||
"mistral/ministral-3b": {
|
||
"description": "A compact, efficient model for on-device tasks such as intelligent assistants and local analytics, providing low-latency performance."
|
||
},
|
||
"mistral/ministral-8b": {
|
||
"description": "A more powerful model with faster, memory-efficient inference, ideal for complex workflows and demanding edge applications."
|
||
},
|
||
"mistral/mistral-embed": {
|
||
"description": "A general-purpose text embedding model for semantic search, similarity, clustering, and RAG workflows."
|
||
},
|
||
"mistral/mistral-large": {
|
||
"description": "Mistral Large is ideal for complex tasks requiring large-scale reasoning capabilities or high specialization—such as synthetic text generation, code generation, RAG, or agents."
|
||
},
|
||
"mistral/mistral-small": {
|
||
"description": "Mistral Small is ideal for simple tasks that can be batched—such as classification, customer support, or text generation. It delivers excellent performance at an affordable price point."
|
||
},
|
||
"mistral/mixtral-8x22b-instruct": {
|
||
"description": "8x22b Instruct model. 8x22b is a Mixture of Experts open-source model served by Mistral."
|
||
},
|
||
"mistral/pixtral-12b": {
|
||
"description": "A 12B model with image understanding capabilities as well as text."
|
||
},
|
||
"mistral/pixtral-large": {
|
||
"description": "Pixtral Large is the second model in our multimodal family, demonstrating cutting-edge image understanding. Specifically, it can comprehend documents, charts, and natural images while maintaining the leading text understanding capabilities of Mistral Large 2."
|
||
},
|
||
"mistralai/Mistral-7B-Instruct-v0.1": {
|
||
"description": "Mistral (7B) Instruct is known for its high performance, suitable for various language tasks."
|
||
},
|
||
"mistralai/Mistral-7B-Instruct-v0.2": {
|
||
"description": "Mistral 7B is a model fine-tuned on demand, providing optimized answers for tasks."
|
||
},
|
||
"mistralai/Mistral-7B-Instruct-v0.3": {
|
||
"description": "Mistral (7B) Instruct v0.3 offers efficient computational power and natural language understanding, suitable for a wide range of applications."
|
||
},
|
||
"mistralai/Mistral-7B-v0.1": {
|
||
"description": "Mistral 7B is a compact yet high-performance model, adept at handling batch processing and simple tasks like classification and text generation, featuring good reasoning capabilities."
|
||
},
|
||
"mistralai/Mixtral-8x22B-Instruct-v0.1": {
|
||
"description": "Mixtral-8x22B Instruct (141B) is a super large language model that supports extremely high processing demands."
|
||
},
|
||
"mistralai/Mixtral-8x7B-Instruct-v0.1": {
|
||
"description": "Mixtral 8x7B is a pre-trained sparse mixture of experts model for general text tasks."
|
||
},
|
||
"mistralai/Mixtral-8x7B-v0.1": {
|
||
"description": "Mixtral 8x7B is a sparse expert model that utilizes multiple parameters to enhance reasoning speed, suitable for multilingual and code generation tasks."
|
||
},
|
||
"mistralai/mistral-nemo": {
|
||
"description": "Mistral Nemo is a multilingual model with 7.3 billion parameters, designed for high-performance programming."
|
||
},
|
||
"mixtral": {
|
||
"description": "Mixtral is an expert model from Mistral AI, featuring open-source weights and providing support in code generation and language understanding."
|
||
},
|
||
"mixtral-8x7b-32768": {
|
||
"description": "Mixtral 8x7B provides high fault-tolerant parallel computing capabilities, suitable for complex tasks."
|
||
},
|
||
"mixtral:8x22b": {
|
||
"description": "Mixtral is an expert model from Mistral AI, featuring open-source weights and providing support in code generation and language understanding."
|
||
},
|
||
"moonshot-v1-128k": {
|
||
"description": "Moonshot V1 128K is a model with ultra-long context processing capabilities, suitable for generating extremely long texts, meeting the demands of complex generation tasks, capable of handling up to 128,000 tokens, making it ideal for research, academia, and large document generation."
|
||
},
|
||
"moonshot-v1-128k-vision-preview": {
|
||
"description": "The Kimi visual model (including moonshot-v1-8k-vision-preview, moonshot-v1-32k-vision-preview, moonshot-v1-128k-vision-preview, etc.) can understand image content, including text in images, colors, and shapes of objects."
|
||
},
|
||
"moonshot-v1-32k": {
|
||
"description": "Moonshot V1 32K offers medium-length context processing capabilities, able to handle 32,768 tokens, particularly suitable for generating various long documents and complex dialogues, applicable in content creation, report generation, and dialogue systems."
|
||
},
|
||
"moonshot-v1-32k-vision-preview": {
|
||
"description": "The Kimi visual model (including moonshot-v1-8k-vision-preview, moonshot-v1-32k-vision-preview, moonshot-v1-128k-vision-preview, etc.) can understand image content, including text in images, colors, and shapes of objects."
|
||
},
|
||
"moonshot-v1-8k": {
|
||
"description": "Moonshot V1 8K is designed for generating short text tasks, featuring efficient processing performance, capable of handling 8,192 tokens, making it ideal for brief dialogues, note-taking, and rapid content generation."
|
||
},
|
||
"moonshot-v1-8k-vision-preview": {
|
||
"description": "The Kimi visual model (including moonshot-v1-8k-vision-preview, moonshot-v1-32k-vision-preview, moonshot-v1-128k-vision-preview, etc.) can understand image content, including text in images, colors, and shapes of objects."
|
||
},
|
||
"moonshot-v1-auto": {
|
||
"description": "Moonshot V1 Auto can select the appropriate model based on the number of tokens used in the current context."
|
||
},
|
||
"moonshotai/Kimi-Dev-72B": {
|
||
"description": "Kimi-Dev-72B is an open-source large code model optimized through extensive reinforcement learning, capable of producing robust, production-ready patches. This model achieved a new high score of 60.4% on SWE-bench Verified, setting a record for open-source models in automated software engineering tasks such as defect repair and code review."
|
||
},
|
||
"moonshotai/Kimi-K2-Instruct-0905": {
|
||
"description": "Kimi K2-Instruct-0905 is the latest and most powerful version of Kimi K2. It is a top-tier Mixture of Experts (MoE) language model with a total of 1 trillion parameters and 32 billion activated parameters. Key features of this model include enhanced agent coding intelligence, demonstrating significant performance improvements in public benchmark tests and real-world agent coding tasks; and an improved frontend coding experience, with advancements in both aesthetics and practicality for frontend programming."
|
||
},
|
||
"moonshotai/kimi-k2": {
|
||
"description": "Kimi K2 is a large-scale Mixture of Experts (MoE) language model developed by Moonshot AI, with a total of 1 trillion parameters and 32 billion active parameters per forward pass. It is optimized for agent capabilities, including advanced tool use, reasoning, and code synthesis."
|
||
},
|
||
"moonshotai/kimi-k2-0905": {
|
||
"description": "The kimi-k2-0905-preview model has a context length of 256k, featuring stronger Agentic Coding capabilities, more outstanding aesthetics and practicality of frontend code, and better context understanding."
|
||
},
|
||
"moonshotai/kimi-k2-instruct-0905": {
|
||
"description": "The kimi-k2-0905-preview model has a context length of 256k, featuring stronger Agentic Coding capabilities, more outstanding aesthetics and practicality of frontend code, and better context understanding."
|
||
},
|
||
"morph/morph-v3-fast": {
|
||
"description": "Morph offers a specialized AI model that applies code changes suggested by cutting-edge models like Claude or GPT-4o to your existing code files FAST - 4500+ tokens/second. It acts as the final step in the AI coding workflow. Supports 16k input tokens and 16k output tokens."
|
||
},
|
||
"morph/morph-v3-large": {
|
||
"description": "Morph offers a specialized AI model that applies code changes suggested by cutting-edge models like Claude or GPT-4o to your existing code files FAST - 2500+ tokens/second. It acts as the final step in the AI coding workflow. Supports 16k input tokens and 16k output tokens."
|
||
},
|
||
"nousresearch/hermes-2-pro-llama-3-8b": {
|
||
"description": "Hermes 2 Pro Llama 3 8B is an upgraded version of Nous Hermes 2, featuring the latest internally developed datasets."
|
||
},
|
||
"nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": {
|
||
"description": "Llama 3.1 Nemotron 70B is a large language model customized by NVIDIA, designed to enhance the helpfulness of LLM-generated responses to user queries. The model has excelled in benchmark tests such as Arena Hard, AlpacaEval 2 LC, and GPT-4-Turbo MT-Bench, ranking first in all three automatic alignment benchmarks as of October 1, 2024. The model is trained using RLHF (specifically REINFORCE), Llama-3.1-Nemotron-70B-Reward, and HelpSteer2-Preference prompts based on the Llama-3.1-70B-Instruct model."
|
||
},
|
||
"nvidia/llama-3.1-nemotron-51b-instruct": {
|
||
"description": "A unique language model offering unparalleled accuracy and efficiency."
|
||
},
|
||
"nvidia/llama-3.1-nemotron-70b-instruct": {
|
||
"description": "Llama-3.1-Nemotron-70B-Instruct is a custom large language model by NVIDIA designed to enhance the helpfulness of LLM-generated responses."
|
||
},
|
||
"o1": {
|
||
"description": "Focused on advanced reasoning and solving complex problems, including mathematical and scientific tasks. It is particularly well-suited for applications that require deep contextual understanding and agent workflow."
|
||
},
|
||
"o1-mini": {
|
||
"description": "o1-mini is a fast and cost-effective reasoning model designed for programming, mathematics, and scientific applications. This model features a 128K context and has a knowledge cutoff date of October 2023."
|
||
},
|
||
"o1-preview": {
|
||
"description": "Focused on advanced reasoning and solving complex problems, including mathematical and scientific tasks. Ideal for applications that require deep contextual understanding and autonomous workflows."
|
||
},
|
||
"o1-pro": {
|
||
"description": "The o1 series models are trained with reinforcement learning to think before answering and perform complex reasoning tasks. The o1-pro model uses more computational resources for deeper thinking, consistently delivering higher-quality responses."
|
||
},
|
||
"o3": {
|
||
"description": "o3 is a versatile and powerful model that excels across multiple domains. It sets new benchmarks for tasks in mathematics, science, programming, and visual reasoning. It is also skilled in technical writing and instruction following, allowing users to analyze text, code, and images to solve complex multi-step problems."
|
||
},
|
||
"o3-2025-04-16": {
|
||
"description": "o3 is OpenAI's new reasoning model, supporting text and image inputs with text outputs, suitable for complex tasks requiring broad general knowledge."
|
||
},
|
||
"o3-deep-research": {
|
||
"description": "o3-deep-research is our most advanced deep research model, specifically designed to handle complex multi-step research tasks. It can search and synthesize information from the internet, as well as access and utilize your proprietary data through the MCP connector."
|
||
},
|
||
"o3-mini": {
|
||
"description": "o3-mini is our latest small inference model that delivers high intelligence while maintaining the same cost and latency targets as o1-mini."
|
||
},
|
||
"o3-pro": {
|
||
"description": "The o3-pro model employs greater computational power for deeper thinking and consistently provides better answers. It is only supported under the Responses API."
|
||
},
|
||
"o3-pro-2025-06-10": {
|
||
"description": "o3 Pro is OpenAI's new reasoning model, supporting text and image inputs with text outputs, designed for complex tasks requiring extensive general knowledge."
|
||
},
|
||
"o4-mini": {
|
||
"description": "o4-mini is our latest small model in the o series. It is optimized for fast and efficient inference, demonstrating high efficiency and performance in coding and visual tasks."
|
||
},
|
||
"o4-mini-2025-04-16": {
|
||
"description": "o4-mini is OpenAI's reasoning model supporting text and image inputs with text outputs, suitable for complex tasks requiring broad general knowledge. This model features a 200K token context window."
|
||
},
|
||
"o4-mini-deep-research": {
|
||
"description": "o4-mini-deep-research is our faster and more affordable deep research model—ideal for tackling complex multi-step research tasks. It can search and synthesize information from the internet, as well as access and utilize your proprietary data through the MCP connector."
|
||
},
|
||
"open-codestral-mamba": {
|
||
"description": "Codestral Mamba is a language model focused on code generation, providing strong support for advanced coding and reasoning tasks."
|
||
},
|
||
"open-mistral-7b": {
|
||
"description": "Mistral 7B is a compact yet high-performance model, excelling in batch processing and simple tasks such as classification and text generation, with good reasoning capabilities."
|
||
},
|
||
"open-mistral-nemo": {
|
||
"description": "Mistral Nemo is a 12B model developed in collaboration with Nvidia, offering outstanding reasoning and coding performance, easy to integrate and replace."
|
||
},
|
||
"open-mixtral-8x22b": {
|
||
"description": "Mixtral 8x22B is a larger expert model focused on complex tasks, providing excellent reasoning capabilities and higher throughput."
|
||
},
|
||
"open-mixtral-8x7b": {
|
||
"description": "Mixtral 8x7B is a sparse expert model that leverages multiple parameters to enhance reasoning speed, suitable for handling multilingual and code generation tasks."
|
||
},
|
||
"openai/gpt-3.5-turbo": {
|
||
"description": "OpenAI's most capable and cost-effective model in the GPT-3.5 series, optimized for chat purposes but also performing well on traditional completion tasks."
|
||
},
|
||
"openai/gpt-3.5-turbo-instruct": {
|
||
"description": "Capabilities similar to GPT-3 era models. Compatible with traditional completion endpoints rather than chat completion endpoints."
|
||
},
|
||
"openai/gpt-4-turbo": {
|
||
"description": "OpenAI's gpt-4-turbo features broad general knowledge and domain expertise, enabling it to follow complex natural language instructions and accurately solve difficult problems. Its knowledge cutoff is April 2023, with a 128,000 token context window."
|
||
},
|
||
"openai/gpt-4.1": {
|
||
"description": "GPT 4.1 is OpenAI's flagship model, suited for complex tasks. It excels at cross-domain problem solving."
|
||
},
|
||
"openai/gpt-4.1-mini": {
|
||
"description": "GPT 4.1 mini balances intelligence, speed, and cost, making it an attractive model for many use cases."
|
||
},
|
||
"openai/gpt-4.1-nano": {
|
||
"description": "GPT-4.1 nano is the fastest and most cost-effective GPT 4.1 model."
|
||
},
|
||
"openai/gpt-4o": {
|
||
"description": "GPT-4o from OpenAI has broad general knowledge and domain expertise, capable of following complex natural language instructions and accurately solving challenging problems. It matches GPT-4 Turbo's performance with a faster, cheaper API."
|
||
},
|
||
"openai/gpt-4o-mini": {
|
||
"description": "GPT-4o mini from OpenAI is their most advanced and cost-effective small model. It is multimodal (accepting text or image inputs and outputting text) and more intelligent than gpt-3.5-turbo, while maintaining similar speed."
|
||
},
|
||
"openai/gpt-5": {
|
||
"description": "GPT-5 is OpenAI's flagship language model, excelling in complex reasoning, extensive real-world knowledge, code-intensive, and multi-step agent tasks."
|
||
},
|
||
"openai/gpt-5-mini": {
|
||
"description": "GPT-5 mini is a cost-optimized model performing well on reasoning/chat tasks. It offers the best balance of speed, cost, and capability."
|
||
},
|
||
"openai/gpt-5-nano": {
|
||
"description": "GPT-5 nano is a high-throughput model excelling at simple instruction or classification tasks."
|
||
},
|
||
"openai/gpt-oss-120b": {
|
||
"description": "An extremely capable general-purpose large language model with powerful, controllable reasoning abilities."
|
||
},
|
||
"openai/gpt-oss-20b": {
|
||
"description": "A compact, open-source weighted language model optimized for low latency and resource-constrained environments, including local and edge deployments."
|
||
},
|
||
"openai/o1": {
|
||
"description": "OpenAI's o1 is a flagship reasoning model designed for complex problems requiring deep thought. It provides strong reasoning capabilities and higher accuracy for complex multi-step tasks."
|
||
},
|
||
"openai/o1-mini": {
|
||
"description": "o1-mini is a fast and cost-effective reasoning model designed for programming, mathematics, and scientific applications. This model features a 128K context and has a knowledge cutoff date of October 2023."
|
||
},
|
||
"openai/o1-preview": {
|
||
"description": "o1 is OpenAI's new reasoning model, suitable for complex tasks that require extensive general knowledge. This model features a 128K context and has a knowledge cutoff date of October 2023."
|
||
},
|
||
"openai/o3": {
|
||
"description": "OpenAI's o3 is the most powerful reasoning model, setting new state-of-the-art levels in coding, mathematics, science, and visual perception. It excels at complex queries requiring multifaceted analysis, with special strengths in analyzing images, charts, and graphs."
|
||
},
|
||
"openai/o3-mini": {
|
||
"description": "o3-mini is OpenAI's latest small reasoning model, delivering high intelligence at the same cost and latency targets as o1-mini."
|
||
},
|
||
"openai/o3-mini-high": {
|
||
"description": "O3-mini high inference level version provides high intelligence at the same cost and latency targets as o1-mini."
|
||
},
|
||
"openai/o4-mini": {
|
||
"description": "OpenAI's o4-mini offers fast, cost-effective reasoning with excellent performance for its size, especially in mathematics (best in AIME benchmark), coding, and visual tasks."
|
||
},
|
||
"openai/o4-mini-high": {
|
||
"description": "o4-mini high inference level version, optimized for fast and efficient inference, demonstrating high efficiency and performance in coding and visual tasks."
|
||
},
|
||
"openai/text-embedding-3-large": {
|
||
"description": "OpenAI's most capable embedding model, suitable for English and non-English tasks."
|
||
},
|
||
"openai/text-embedding-3-small": {
|
||
"description": "OpenAI's improved, higher-performance version of the ada embedding model."
|
||
},
|
||
"openai/text-embedding-ada-002": {
|
||
"description": "OpenAI's traditional text embedding model."
|
||
},
|
||
"openrouter/auto": {
|
||
"description": "Based on context length, topic, and complexity, your request will be sent to Llama 3 70B Instruct, Claude 3.5 Sonnet (self-regulating), or GPT-4o."
|
||
},
|
||
"perplexity/sonar": {
|
||
"description": "Perplexity's lightweight product with search grounding capabilities, faster and cheaper than Sonar Pro."
|
||
},
|
||
"perplexity/sonar-pro": {
|
||
"description": "Perplexity's flagship product with search grounding capabilities, supporting advanced queries and follow-up actions."
|
||
},
|
||
"perplexity/sonar-reasoning": {
|
||
"description": "A reasoning-focused model that outputs chain-of-thought (CoT) in responses, providing detailed explanations with search grounding."
|
||
},
|
||
"perplexity/sonar-reasoning-pro": {
|
||
"description": "An advanced reasoning-focused model that outputs chain-of-thought (CoT) in responses, offering comprehensive explanations with enhanced search capabilities and multiple search queries per request."
|
||
},
|
||
"phi3": {
|
||
"description": "Phi-3 is a lightweight open model launched by Microsoft, suitable for efficient integration and large-scale knowledge reasoning."
|
||
},
|
||
"phi3:14b": {
|
||
"description": "Phi-3 is a lightweight open model launched by Microsoft, suitable for efficient integration and large-scale knowledge reasoning."
|
||
},
|
||
"pixtral-12b-2409": {
|
||
"description": "The Pixtral model demonstrates strong capabilities in tasks such as chart and image understanding, document question answering, multimodal reasoning, and instruction following. It can ingest images at natural resolutions and aspect ratios and handle an arbitrary number of images within a long context window of up to 128K tokens."
|
||
},
|
||
"pixtral-large-latest": {
|
||
"description": "Pixtral Large is an open-source multimodal model with 124 billion parameters, built on Mistral Large 2. This is the second model in our multimodal family, showcasing cutting-edge image understanding capabilities."
|
||
},
|
||
"pro-128k": {
|
||
"description": "Spark Pro 128K is equipped with an extra-large context processing capability, able to handle up to 128K of contextual information, making it particularly suitable for long-form content that requires comprehensive analysis and long-term logical connections, providing smooth and consistent logic and diverse citation support in complex text communication."
|
||
},
|
||
"pro-deepseek-r1": {
|
||
"description": "Enterprise-exclusive service model with concurrent service support."
|
||
},
|
||
"pro-deepseek-v3": {
|
||
"description": "Enterprise-exclusive service model with concurrent service support."
|
||
},
|
||
"qvq-72b-preview": {
|
||
"description": "The QVQ model is an experimental research model developed by the Qwen team, focusing on enhancing visual reasoning capabilities, particularly in the field of mathematical reasoning."
|
||
},
|
||
"qvq-max": {
|
||
"description": "Tongyi Qianwen QVQ visual reasoning model supports visual input and chain-of-thought output, demonstrating stronger capabilities in mathematics, programming, visual analysis, creation, and general tasks."
|
||
},
|
||
"qvq-plus": {
|
||
"description": "A visual reasoning model supporting visual inputs and chain-of-thought outputs. The plus version, succeeding the qvq-max model, offers faster reasoning speed and a more balanced trade-off between performance and cost."
|
||
},
|
||
"qwen-3-32b": {
|
||
"description": "Qwen 3 32B: A strong performer in multilingual and coding tasks, suitable for medium-scale production use."
|
||
},
|
||
"qwen-3-coder-480b": {
|
||
"description": "Qwen 3 Coder 480B: A long-context model designed for code generation and complex programming tasks."
|
||
},
|
||
"qwen-coder-plus": {
|
||
"description": "Tongyi Qianwen coding model."
|
||
},
|
||
"qwen-coder-turbo": {
|
||
"description": "Tongyi Qianwen coding model."
|
||
},
|
||
"qwen-coder-turbo-latest": {
|
||
"description": "The Tongyi Qianwen Coder model."
|
||
},
|
||
"qwen-flash": {
|
||
"description": "The Tongyi Qianwen Flash series offers the fastest, most cost-effective models, suitable for simple tasks."
|
||
},
|
||
"qwen-image": {
|
||
"description": "Qwen-Image is a general-purpose image generation model that supports a wide range of artistic styles and is particularly adept at rendering complex text, especially Chinese and English. The model supports multi-line layouts, paragraph-level text generation, and fine-grained detail rendering, enabling complex mixed text-and-image layout designs."
|
||
},
|
||
"qwen-image-edit": {
|
||
"description": "Qwen Image Edit is an image-to-image model that supports editing and modifying images based on input images and text prompts, enabling precise adjustments and creative transformations of the original image according to user needs."
|
||
},
|
||
"qwen-long": {
|
||
"description": "Qwen is a large-scale language model that supports long text contexts and dialogue capabilities based on long documents and multiple documents."
|
||
},
|
||
"qwen-math-plus": {
|
||
"description": "Tongyi Qianwen math model specialized for solving mathematical problems."
|
||
},
|
||
"qwen-math-plus-latest": {
|
||
"description": "The Tongyi Qianwen Math model is specifically designed for solving mathematical problems."
|
||
},
|
||
"qwen-math-turbo": {
|
||
"description": "Tongyi Qianwen math model specialized for solving mathematical problems."
|
||
},
|
||
"qwen-math-turbo-latest": {
|
||
"description": "The Tongyi Qianwen Math model is specifically designed for solving mathematical problems."
|
||
},
|
||
"qwen-max": {
|
||
"description": "Qwen Max is a trillion-level large-scale language model that supports input in various languages including Chinese and English, and is the API model behind the current Qwen 2.5 product version."
|
||
},
|
||
"qwen-omni-turbo": {
|
||
"description": "Qwen-Omni series models support multi-modal inputs including video, audio, images, and text, and output audio and text."
|
||
},
|
||
"qwen-plus": {
|
||
"description": "Qwen Plus is an enhanced large-scale language model supporting input in various languages including Chinese and English."
|
||
},
|
||
"qwen-turbo": {
|
||
"description": "Tongyi Qianwen Turbo will no longer receive updates; it is recommended to switch to Tongyi Qianwen Flash. Tongyi Qianwen is an ultra-large language model that supports input in Chinese, English, and other languages."
|
||
},
|
||
"qwen-vl-chat-v1": {
|
||
"description": "Qwen VL supports flexible interaction methods, including multi-image, multi-turn Q&A, and creative capabilities."
|
||
},
|
||
"qwen-vl-max": {
|
||
"description": "Tongyi Qianwen ultra-large-scale vision-language model. Compared to the enhanced version, it further improves visual reasoning and instruction compliance, providing higher levels of visual perception and cognition."
|
||
},
|
||
"qwen-vl-max-latest": {
|
||
"description": "Tongyi Qianwen's ultra-large-scale visual language model. Compared to the enhanced version, it further improves visual reasoning and instruction-following abilities, providing a higher level of visual perception and cognition."
|
||
},
|
||
"qwen-vl-ocr": {
|
||
"description": "Tongyi Qianwen OCR is a dedicated text extraction model focusing on documents, tables, exam questions, handwritten text, and other image types. It can recognize multiple languages currently supported: Chinese, English, French, Japanese, Korean, German, Russian, Italian, Vietnamese, and Arabic."
|
||
},
|
||
"qwen-vl-plus": {
|
||
"description": "Enhanced version of Tongyi Qianwen large-scale vision-language model. Greatly improves detail recognition and text recognition capabilities, supporting images with resolutions over one million pixels and arbitrary aspect ratios."
|
||
},
|
||
"qwen-vl-plus-latest": {
|
||
"description": "Tongyi Qianwen's large-scale visual language model enhanced version. Significantly improves detail recognition and text recognition capabilities, supporting ultra-high pixel resolution and images of any aspect ratio."
|
||
},
|
||
"qwen-vl-v1": {
|
||
"description": "Initialized with the Qwen-7B language model, this pre-trained model adds an image model with an input resolution of 448."
|
||
},
|
||
"qwen/qwen-2-7b-instruct": {
|
||
"description": "Qwen2 is a brand new series of large language models. Qwen2 7B is a transformer-based model that excels in language understanding, multilingual capabilities, programming, mathematics, and reasoning."
|
||
},
|
||
"qwen/qwen-2-7b-instruct:free": {
|
||
"description": "Qwen2 is a brand new series of large language models with enhanced understanding and generation capabilities."
|
||
},
|
||
"qwen/qwen-2-vl-72b-instruct": {
|
||
"description": "Qwen2-VL is the latest iteration of the Qwen-VL model, achieving state-of-the-art performance in visual understanding benchmarks, including MathVista, DocVQA, RealWorldQA, and MTVQA. Qwen2-VL can understand videos over 20 minutes long for high-quality video-based Q&A, dialogue, and content creation. It also possesses complex reasoning and decision-making capabilities, allowing integration with mobile devices, robots, and more for automated operations based on visual environments and text instructions. In addition to English and Chinese, Qwen2-VL now supports understanding text in different languages within images, including most European languages, Japanese, Korean, Arabic, and Vietnamese."
|
||
},
|
||
"qwen/qwen-2.5-72b-instruct": {
|
||
"description": "Qwen2.5-72B-Instruct is one of the latest large language model series released by Alibaba Cloud. This 72B model has significantly improved capabilities in coding and mathematics. The model also offers multilingual support, covering over 29 languages, including Chinese and English. It shows significant enhancements in instruction following, understanding structured data, and generating structured outputs (especially JSON)."
|
||
},
|
||
"qwen/qwen2.5-32b-instruct": {
|
||
"description": "Qwen2.5-32B-Instruct is one of the latest large language model series released by Alibaba Cloud. This 32B model has significantly improved capabilities in coding and mathematics. The model provides multilingual support, covering over 29 languages, including Chinese and English. It shows significant enhancements in instruction following, understanding structured data, and generating structured outputs (especially JSON)."
|
||
},
|
||
"qwen/qwen2.5-7b-instruct": {
|
||
"description": "An LLM focused on both Chinese and English, targeting language, programming, mathematics, reasoning, and more."
|
||
},
|
||
"qwen/qwen2.5-coder-32b-instruct": {
|
||
"description": "An advanced LLM supporting code generation, reasoning, and debugging, covering mainstream programming languages."
|
||
},
|
||
"qwen/qwen2.5-coder-7b-instruct": {
|
||
"description": "A powerful medium-sized code model supporting 32K context length, proficient in multilingual programming."
|
||
},
|
||
"qwen/qwen3-14b": {
|
||
"description": "Qwen3-14B is a dense 14.8 billion parameter causal language model in the Qwen3 series, designed for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, programming, and logical reasoning, and a 'non-thinking' mode for general conversation. This model is fine-tuned for instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
|
||
},
|
||
"qwen/qwen3-14b:free": {
|
||
"description": "Qwen3-14B is a dense 14.8 billion parameter causal language model in the Qwen3 series, designed for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, programming, and logical reasoning, and a 'non-thinking' mode for general conversation. This model is fine-tuned for instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
|
||
},
|
||
"qwen/qwen3-235b-a22b": {
|
||
"description": "Qwen3-235B-A22B is a 235 billion parameter mixture of experts (MoE) model developed by Qwen, activating 22 billion parameters per forward pass. It supports seamless switching between a 'thinking' mode for complex reasoning, mathematics, and coding tasks, and a 'non-thinking' mode for general conversational efficiency. This model showcases strong reasoning capabilities, multilingual support (over 100 languages and dialects), advanced instruction following, and agent tool invocation capabilities. It natively handles a 32K token context window and can be extended to 131K tokens using YaRN."
|
||
},
|
||
"qwen/qwen3-235b-a22b:free": {
|
||
"description": "Qwen3-235B-A22B is a 235 billion parameter mixture of experts (MoE) model developed by Qwen, activating 22 billion parameters per forward pass. It supports seamless switching between a 'thinking' mode for complex reasoning, mathematics, and coding tasks, and a 'non-thinking' mode for general conversational efficiency. This model showcases strong reasoning capabilities, multilingual support (over 100 languages and dialects), advanced instruction following, and agent tool invocation capabilities. It natively handles a 32K token context window and can be extended to 131K tokens using YaRN."
|
||
},
|
||
"qwen/qwen3-30b-a3b": {
|
||
"description": "Qwen3 is the latest generation in the Qwen large language model series, featuring a dense and mixture of experts (MoE) architecture that excels in reasoning, multilingual support, and advanced agent tasks. Its unique ability to seamlessly switch between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile and high-quality performance.\n\nQwen3 significantly outperforms previous models such as QwQ and Qwen2.5, offering exceptional capabilities in mathematics, coding, common sense reasoning, creative writing, and interactive dialogue. The Qwen3-30B-A3B variant contains 30.5 billion parameters (3.3 billion active parameters), 48 layers, 128 experts (activating 8 for each task), and supports up to 131K token context (using YaRN), setting a new standard for open-source models."
|
||
},
|
||
"qwen/qwen3-30b-a3b:free": {
|
||
"description": "Qwen3 is the latest generation in the Qwen large language model series, featuring a dense and mixture of experts (MoE) architecture that excels in reasoning, multilingual support, and advanced agent tasks. Its unique ability to seamlessly switch between a thinking mode for complex reasoning and a non-thinking mode for efficient dialogue ensures versatile and high-quality performance.\n\nQwen3 significantly outperforms previous models such as QwQ and Qwen2.5, offering exceptional capabilities in mathematics, coding, common sense reasoning, creative writing, and interactive dialogue. The Qwen3-30B-A3B variant contains 30.5 billion parameters (3.3 billion active parameters), 48 layers, 128 experts (activating 8 for each task), and supports up to 131K token context (using YaRN), setting a new standard for open-source models."
|
||
},
|
||
"qwen/qwen3-32b": {
|
||
"description": "Qwen3-32B is a dense 32.8 billion parameter causal language model in the Qwen3 series, optimized for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, coding, and logical reasoning, and a 'non-thinking' mode for faster, general conversation. This model demonstrates strong performance in instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
|
||
},
|
||
"qwen/qwen3-32b:free": {
|
||
"description": "Qwen3-32B is a dense 32.8 billion parameter causal language model in the Qwen3 series, optimized for complex reasoning and efficient dialogue. It supports seamless switching between a 'thinking' mode for tasks such as mathematics, coding, and logical reasoning, and a 'non-thinking' mode for faster, general conversation. This model demonstrates strong performance in instruction following, agent tool usage, creative writing, and multilingual tasks across more than 100 languages and dialects. It natively handles a 32K token context and can be extended to 131K tokens using YaRN."
|
||
},
|
||
"qwen/qwen3-8b:free": {
|
||
"description": "Qwen3-8B is a dense 8.2 billion parameter causal language model in the Qwen3 series, designed for reasoning-intensive tasks and efficient dialogue. It supports seamless switching between a 'thinking' mode for mathematics, coding, and logical reasoning, and a 'non-thinking' mode for general conversation. This model is fine-tuned for instruction following, agent integration, creative writing, and multilingual use across more than 100 languages and dialects. It natively supports a 32K token context window and can be extended to 131K tokens via YaRN."
|
||
},
|
||
"qwen2": {
|
||
"description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
|
||
},
|
||
"qwen2.5": {
|
||
"description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
|
||
},
|
||
"qwen2.5-14b-instruct": {
|
||
"description": "The 14B model of Tongyi Qianwen 2.5 is open-sourced."
|
||
},
|
||
"qwen2.5-14b-instruct-1m": {
|
||
"description": "The Tongyi Qianwen 2.5 model is open-sourced at a scale of 72B."
|
||
},
|
||
"qwen2.5-32b-instruct": {
|
||
"description": "The 32B model of Tongyi Qianwen 2.5 is open-sourced."
|
||
},
|
||
"qwen2.5-72b-instruct": {
|
||
"description": "The 72B model of Tongyi Qianwen 2.5 is open-sourced."
|
||
},
|
||
"qwen2.5-7b-instruct": {
|
||
"description": "The 7B model of Tongyi Qianwen 2.5 is open-sourced."
|
||
},
|
||
"qwen2.5-coder-1.5b-instruct": {
|
||
"description": "Open-source version of the Qwen coding model."
|
||
},
|
||
"qwen2.5-coder-14b-instruct": {
|
||
"description": "Open-source version of Tongyi Qianwen coding model."
|
||
},
|
||
"qwen2.5-coder-32b-instruct": {
|
||
"description": "Open-source version of the Tongyi Qianwen code model."
|
||
},
|
||
"qwen2.5-coder-7b-instruct": {
|
||
"description": "The open-source version of the Tongyi Qianwen Coder model."
|
||
},
|
||
"qwen2.5-coder-instruct": {
|
||
"description": "Qwen2.5-Coder is the latest code-specific large language model in the Qwen series (formerly known as CodeQwen)."
|
||
},
|
||
"qwen2.5-instruct": {
|
||
"description": "Qwen2.5 is the latest series of Qwen large language models. For Qwen2.5, we have released multiple base language models and instruction-tuned language models with parameter sizes ranging from 0.5 billion to 7.2 billion."
|
||
},
|
||
"qwen2.5-math-1.5b-instruct": {
|
||
"description": "Qwen-Math model has powerful mathematical problem-solving capabilities."
|
||
},
|
||
"qwen2.5-math-72b-instruct": {
|
||
"description": "The Qwen-Math model possesses strong capabilities for solving mathematical problems."
|
||
},
|
||
"qwen2.5-math-7b-instruct": {
|
||
"description": "The Qwen-Math model possesses strong capabilities for solving mathematical problems."
|
||
},
|
||
"qwen2.5-omni-7b": {
|
||
"description": "The Qwen-Omni series models support input of various modalities, including video, audio, images, and text, and output audio and text."
|
||
},
|
||
"qwen2.5-vl-32b-instruct": {
|
||
"description": "The Qwen2.5-VL model series enhances the model's intelligence level, practicality, and applicability, delivering superior performance in scenarios such as natural conversations, content creation, professional knowledge services, and code development. The 32B version employs reinforcement learning techniques to optimize the model, offering more human-preferred output styles, enhanced reasoning capabilities for complex mathematical problems, and fine-grained image understanding and reasoning compared to other models in the Qwen2.5-VL series."
|
||
},
|
||
"qwen2.5-vl-72b-instruct": {
|
||
"description": "This version enhances instruction following, mathematics, problem-solving, and coding capabilities, improving the ability to recognize various formats and accurately locate visual elements. It supports understanding long video files (up to 10 minutes) and pinpointing events in seconds, comprehending the sequence and speed of time, and based on parsing and locating capabilities, it supports controlling OS or Mobile agents. It has strong key information extraction and JSON output capabilities, and this version is the most powerful in the series at 72B."
|
||
},
|
||
"qwen2.5-vl-7b-instruct": {
|
||
"description": "This version enhances instruction following, mathematics, problem-solving, and coding capabilities, improving the ability to recognize various formats and accurately locate visual elements. It supports understanding long video files (up to 10 minutes) and pinpointing events in seconds, comprehending the sequence and speed of time, and based on parsing and locating capabilities, it supports controlling OS or Mobile agents. It has strong key information extraction and JSON output capabilities, and this version is the most powerful in the series at 72B."
|
||
},
|
||
"qwen2.5-vl-instruct": {
|
||
"description": "Qwen2.5-VL is the latest version of the visual language model in the Qwen model family."
|
||
},
|
||
"qwen2.5:0.5b": {
|
||
"description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
|
||
},
|
||
"qwen2.5:1.5b": {
|
||
"description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
|
||
},
|
||
"qwen2.5:72b": {
|
||
"description": "Qwen2.5 is Alibaba's next-generation large-scale language model, supporting diverse application needs with outstanding performance."
|
||
},
|
||
"qwen2:0.5b": {
|
||
"description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
|
||
},
|
||
"qwen2:1.5b": {
|
||
"description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
|
||
},
|
||
"qwen2:72b": {
|
||
"description": "Qwen2 is Alibaba's next-generation large-scale language model, supporting diverse application needs with excellent performance."
|
||
},
|
||
"qwen3": {
|
||
"description": "Qwen3 is Alibaba's next-generation large-scale language model, designed to support diverse application needs with outstanding performance."
|
||
},
|
||
"qwen3-0.6b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-1.7b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-14b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-235b-a22b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-235b-a22b-instruct-2507": {
|
||
"description": "An open-source non-thinking mode model based on Qwen3, with slight improvements in subjective creativity and model safety compared to the previous version (Tongyi Qianwen 3-235B-A22B)."
|
||
},
|
||
"qwen3-235b-a22b-thinking-2507": {
|
||
"description": "An open-source thinking mode model based on Qwen3, with significant improvements in logical ability, general capabilities, knowledge enhancement, and creativity compared to the previous version (Tongyi Qianwen 3-235B-A22B), suitable for high-difficulty and strong reasoning scenarios."
|
||
},
|
||
"qwen3-30b-a3b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-30b-a3b-instruct-2507": {
|
||
"description": "Compared to the previous version (Qwen3-30B-A3B), this version shows substantial improvements in overall general capabilities in both Chinese and multilingual contexts. It features specialized optimizations for subjective and open-ended tasks, aligning significantly better with user preferences and providing more helpful responses."
|
||
},
|
||
"qwen3-30b-a3b-thinking-2507": {
|
||
"description": "An open-source thinking mode model based on Qwen3, this version shows significant enhancements over the previous release (Tongyi Qianwen 3-30B-A3B) in logical ability, general capability, knowledge augmentation, and creative capacity. It is suitable for challenging scenarios requiring strong reasoning."
|
||
},
|
||
"qwen3-32b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-4b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-8b": {
|
||
"description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functionality, and multilingual support, while also supporting mode switching."
|
||
},
|
||
"qwen3-coder-30b-a3b-instruct": {
|
||
"description": "The open-source version of the Tongyi Qianwen code model. The latest qwen3-coder-30b-a3b-instruct is a code generation model based on Qwen3, featuring powerful Coding Agent capabilities. It excels at tool usage and environment interaction, enabling autonomous programming with outstanding coding and general abilities."
|
||
},
|
||
"qwen3-coder-480b-a35b-instruct": {
|
||
"description": "Open-source version of Tongyi Qianwen's code model. The latest qwen3-coder-480b-a35b-instruct is a code generation model based on Qwen3, featuring powerful Coding Agent capabilities, proficient in tool invocation and environment interaction, enabling autonomous programming with excellent coding and general capabilities."
|
||
},
|
||
"qwen3-coder-flash": {
|
||
"description": "Tongyi Qianwen code model. The latest Qwen3-Coder series models are code generation models based on Qwen3, equipped with powerful Coding Agent capabilities, proficient in tool invocation and environment interaction, enabling autonomous programming with excellent coding skills alongside general capabilities."
|
||
},
|
||
"qwen3-coder-plus": {
|
||
"description": "Tongyi Qianwen code model. The latest Qwen3-Coder series models are code generation models based on Qwen3, equipped with powerful Coding Agent capabilities, proficient in tool invocation and environment interaction, enabling autonomous programming with excellent coding skills alongside general capabilities."
|
||
},
|
||
"qwen3-coder:480b": {
|
||
"description": "Alibaba's high-performance long-context model tailored for agent and coding tasks."
|
||
},
|
||
"qwen3-max": {
|
||
"description": "Tongyi Qianwen 3 series Max model, which shows significant overall improvements compared to the 2.5 series, including enhanced bilingual (Chinese and English) text understanding, complex instruction following, subjective open-domain task capabilities, multilingual support, and tool invocation abilities; the model also exhibits fewer hallucinations. The latest qwen3-max model features specialized upgrades in agent programming and tool invocation compared to the qwen3-max-preview version. The officially released model achieves state-of-the-art performance in its domain and is adapted to more complex agent scenarios."
|
||
},
|
||
"qwen3-next-80b-a3b-instruct": {
|
||
"description": "A new generation of non-thinking mode open-source model based on Qwen3. Compared to the previous version (Tongyi Qianwen 3-235B-A22B-Instruct-2507), it offers better Chinese text comprehension, enhanced logical reasoning abilities, and improved performance in text generation tasks."
|
||
},
|
||
"qwen3-next-80b-a3b-thinking": {
|
||
"description": "A new generation of thinking mode open-source model based on Qwen3. Compared to the previous version (Tongyi Qianwen 3-235B-A22B-Thinking-2507), it features improved instruction-following capabilities and more concise model-generated summaries."
|
||
},
|
||
"qwen3-omni-flash": {
|
||
"description": "The Qwen-Omni model accepts multimodal input including text, images, audio, and video, and generates responses in text or speech. It offers a variety of human-like voice tones, supports multilingual and dialectal speech output, and is applicable to scenarios such as text creation, visual recognition, and voice assistants."
|
||
},
|
||
"qwen3-vl-235b-a22b-instruct": {
|
||
"description": "Qwen3 VL 235B A22B Instruct Mode (non-thinking), designed for instruction-following scenarios without deep reasoning, while maintaining strong visual understanding capabilities."
|
||
},
|
||
"qwen3-vl-235b-a22b-thinking": {
|
||
"description": "Qwen3 VL 235B A22B Thinking Mode (open-source version), tailored for complex reasoning and long video understanding tasks, offering top-tier visual and textual reasoning performance."
|
||
},
|
||
"qwen3-vl-30b-a3b-instruct": {
|
||
"description": "Qwen3 VL 30B Instruct Mode (non-thinking), designed for general instruction-following scenarios, with strong multimodal understanding and generation capabilities."
|
||
},
|
||
"qwen3-vl-30b-a3b-thinking": {
|
||
"description": "The open-source Qwen-VL model provides visual understanding and text generation capabilities. It supports agent interaction, visual encoding, spatial awareness, long video comprehension, and deep reasoning, with enhanced text recognition and multilingual support in complex scenarios."
|
||
},
|
||
"qwen3-vl-8b-instruct": {
|
||
"description": "Qwen3 VL 8B Instruct Mode (non-thinking), suitable for standard multimodal generation and recognition tasks."
|
||
},
|
||
"qwen3-vl-8b-thinking": {
|
||
"description": "Qwen3 VL 8B Thinking Mode, designed for lightweight multimodal reasoning and interaction scenarios, while retaining long-context understanding capabilities."
|
||
},
|
||
"qwen3-vl-flash": {
|
||
"description": "Qwen3 VL Flash: a lightweight, high-speed inference version ideal for latency-sensitive or high-volume request scenarios."
|
||
},
|
||
"qwen3-vl-plus": {
|
||
"description": "Tongyi Qianwen VL is a text generation model with visual (image) understanding capabilities. It can perform OCR (image text recognition) and further summarize and reason, such as extracting attributes from product photos or solving problems based on exercise images."
|
||
},
|
||
"qwq": {
|
||
"description": "QwQ is an experimental research model focused on improving AI reasoning capabilities."
|
||
},
|
||
"qwq-32b": {
|
||
"description": "The QwQ inference model is trained based on the Qwen2.5-32B model, significantly enhancing its reasoning capabilities through reinforcement learning. The core metrics of the model, including mathematical code (AIME 24/25, LiveCodeBench) and some general metrics (IFEval, LiveBench, etc.), reach the level of the full version of DeepSeek-R1, with all metrics significantly surpassing those of DeepSeek-R1-Distill-Qwen-32B, which is also based on Qwen2.5-32B."
|
||
},
|
||
"qwq-32b-preview": {
|
||
"description": "The QwQ model is an experimental research model developed by the Qwen team, focusing on enhancing AI reasoning capabilities."
|
||
},
|
||
"qwq-plus": {
|
||
"description": "QwQ reasoning model trained based on Qwen2.5, significantly enhancing reasoning ability through reinforcement learning. Core metrics in mathematics and coding (AIME 24/25, LiveCodeBench) and some general benchmarks (IFEval, LiveBench, etc.) reach the full-power level of DeepSeek-R1."
|
||
},
|
||
"qwq_32b": {
|
||
"description": "A medium-sized reasoning model in the Qwen series. Compared to traditional instruction-tuned models, QwQ, with its thinking and reasoning capabilities, significantly enhances performance in downstream tasks, especially in solving challenging problems."
|
||
},
|
||
"r1-1776": {
|
||
"description": "R1-1776 is a version of the DeepSeek R1 model, fine-tuned to provide unfiltered, unbiased factual information."
|
||
},
|
||
"solar-mini": {
|
||
"description": "Solar Mini is a compact LLM that outperforms GPT-3.5, featuring strong multilingual capabilities and supporting English and Korean, providing an efficient and compact solution."
|
||
},
|
||
"solar-mini-ja": {
|
||
"description": "Solar Mini (Ja) extends the capabilities of Solar Mini, focusing on Japanese while maintaining efficiency and excellent performance in English and Korean usage."
|
||
},
|
||
"solar-pro": {
|
||
"description": "Solar Pro is a highly intelligent LLM launched by Upstage, focusing on single-GPU instruction-following capabilities, with an IFEval score above 80. Currently supports English, with a formal version planned for release in November 2024, which will expand language support and context length."
|
||
},
|
||
"sonar": {
|
||
"description": "A lightweight search product based on contextual search, faster and cheaper than Sonar Pro."
|
||
},
|
||
"sonar-deep-research": {
|
||
"description": "Deep Research conducts comprehensive expert-level studies and synthesizes them into accessible, actionable reports."
|
||
},
|
||
"sonar-pro": {
|
||
"description": "An advanced search product that supports contextual search, advanced queries, and follow-ups."
|
||
},
|
||
"sonar-reasoning": {
|
||
"description": "A new API product powered by the DeepSeek reasoning model."
|
||
},
|
||
"sonar-reasoning-pro": {
|
||
"description": "A new API product powered by the DeepSeek reasoning model."
|
||
},
|
||
"stable-diffusion-3-medium": {
|
||
"description": "The latest text-to-image large model released by Stability AI. This version inherits the advantages of its predecessors and significantly improves image quality, text understanding, and style diversity, enabling more accurate interpretation of complex natural language prompts and generating more precise and diverse images."
|
||
},
|
||
"stable-diffusion-3.5-large": {
|
||
"description": "stable-diffusion-3.5-large is an 800-million-parameter multimodal diffusion transformer (MMDiT) text-to-image generation model, offering excellent image quality and prompt matching. It supports generating high-resolution images up to 1 million pixels and runs efficiently on consumer-grade hardware."
|
||
},
|
||
"stable-diffusion-3.5-large-turbo": {
|
||
"description": "stable-diffusion-3.5-large-turbo is a model based on stable-diffusion-3.5-large that employs adversarial diffusion distillation (ADD) technology, providing faster generation speed."
|
||
},
|
||
"stable-diffusion-v1.5": {
|
||
"description": "stable-diffusion-v1.5 is initialized with weights from the stable-diffusion-v1.2 checkpoint and fine-tuned for 595k steps at 512x512 resolution on \"laion-aesthetics v2 5+\", reducing text conditioning by 10% to improve classifier-free guidance sampling."
|
||
},
|
||
"stable-diffusion-xl": {
|
||
"description": "stable-diffusion-xl features major improvements over v1.5 and achieves results comparable to the current open-source text-to-image SOTA model Midjourney. Key enhancements include a UNet backbone three times larger than before, an added refinement module to improve image quality, and more efficient training techniques."
|
||
},
|
||
"stable-diffusion-xl-base-1.0": {
|
||
"description": "A text-to-image large model developed and open-sourced by Stability AI, leading the industry in creative image generation capabilities. It has excellent instruction understanding and supports inverse prompt definitions for precise content generation."
|
||
},
|
||
"step-1-128k": {
|
||
"description": "Balances performance and cost, suitable for general scenarios."
|
||
},
|
||
"step-1-256k": {
|
||
"description": "Equipped with ultra-long context processing capabilities, especially suitable for long document analysis."
|
||
},
|
||
"step-1-32k": {
|
||
"description": "Supports medium-length dialogues, applicable to various application scenarios."
|
||
},
|
||
"step-1-8k": {
|
||
"description": "Small model, suitable for lightweight tasks."
|
||
},
|
||
"step-1-flash": {
|
||
"description": "High-speed model, suitable for real-time dialogues."
|
||
},
|
||
"step-1.5v-mini": {
|
||
"description": "This model has powerful video understanding capabilities."
|
||
},
|
||
"step-1o-turbo-vision": {
|
||
"description": "This model has powerful image understanding capabilities, outperforming 1o in mathematical and coding fields. The model is smaller than 1o and has a faster output speed."
|
||
},
|
||
"step-1o-vision-32k": {
|
||
"description": "This model possesses powerful image understanding capabilities. Compared to the step-1v series models, it offers enhanced visual performance."
|
||
},
|
||
"step-1v-32k": {
|
||
"description": "Supports visual input, enhancing multimodal interaction experiences."
|
||
},
|
||
"step-1v-8k": {
|
||
"description": "A small visual model suitable for basic text and image tasks."
|
||
},
|
||
"step-1x-edit": {
|
||
"description": "This model focuses on image editing tasks, capable of modifying and enhancing images based on user-provided images and text descriptions. It supports multiple input formats, including text descriptions and example images. The model understands user intent and generates image edits that meet the requirements."
|
||
},
|
||
"step-1x-medium": {
|
||
"description": "This model has strong image generation capabilities, supporting text descriptions as input. It natively supports Chinese, better understanding and processing Chinese text descriptions, accurately capturing semantic information and converting it into image features for more precise image generation. The model can generate high-resolution, high-quality images and has some style transfer capabilities."
|
||
},
|
||
"step-2-16k": {
|
||
"description": "Supports large-scale context interactions, suitable for complex dialogue scenarios."
|
||
},
|
||
"step-2-16k-exp": {
|
||
"description": "An experimental version of the step-2 model, featuring the latest capabilities and rolling updates. Not recommended for use in formal production environments."
|
||
},
|
||
"step-2-mini": {
|
||
"description": "A high-speed large model based on the next-generation self-developed Attention architecture MFA, achieving results similar to step-1 at a very low cost, while maintaining higher throughput and faster response times. It is capable of handling general tasks and has specialized skills in coding."
|
||
},
|
||
"step-2x-large": {
|
||
"description": "Step Star next-generation image generation model, focusing on image generation tasks. It can generate high-quality images based on user-provided text descriptions. The new model produces more realistic textures and stronger Chinese and English text generation capabilities."
|
||
},
|
||
"step-3": {
|
||
"description": "This model has powerful visual perception and advanced reasoning capabilities. It can accurately handle complex cross-domain knowledge comprehension, perform integrated analysis of mathematical and visual information, and solve a wide range of visual analysis tasks encountered in everyday life."
|
||
},
|
||
"step-r1-v-mini": {
|
||
"description": "This model is a powerful reasoning model with strong image understanding capabilities, able to process both image and text information, generating text content after deep reasoning. It excels in visual reasoning while also possessing first-tier capabilities in mathematics, coding, and text reasoning. The context length is 100k."
|
||
},
|
||
"step3": {
|
||
"description": "Step3 is a multimodal model developed by StepStar, offering advanced visual understanding capabilities."
|
||
},
|
||
"stepfun-ai/step3": {
|
||
"description": "Step3 is a cutting-edge multimodal reasoning model released by StepFun. It is built on a mixture-of-experts (MoE) architecture with 321B total parameters and 38B active parameters. The model adopts an end-to-end design to minimize decoding cost while delivering top-tier performance in visual-language reasoning. Through the combined design of Multi-Matrix Factorized Attention (MFA) and Attention-FFN Decoupling (AFD), Step3 maintains exceptional efficiency on both high-end and low-end accelerators. During pretraining, Step3 processed over 20 trillion text tokens and 4 trillion image-text mixed tokens, covering more than a dozen languages. The model achieves leading performance among open-source models across benchmarks in mathematics, code, and multimodal tasks."
|
||
},
|
||
"taichu_llm": {
|
||
"description": "The ZD Taichu language model possesses strong language understanding capabilities and excels in text creation, knowledge Q&A, code programming, mathematical calculations, logical reasoning, sentiment analysis, and text summarization. It innovatively combines large-scale pre-training with rich knowledge from multiple sources, continuously refining algorithmic techniques and absorbing new knowledge in vocabulary, structure, grammar, and semantics from vast text data, resulting in an evolving model performance. It provides users with more convenient information and services, as well as a more intelligent experience."
|
||
},
|
||
"taichu_o1": {
|
||
"description": "taichu_o1 is a next-generation reasoning model that achieves human-like thinking chains through multimodal interaction and reinforcement learning, supporting complex decision-making scenarios while maintaining high-precision outputs and demonstrating model reasoning pathways, suitable for strategy analysis and deep thinking."
|
||
},
|
||
"taichu_vl": {
|
||
"description": "Integrates capabilities in image understanding, knowledge transfer, and logical attribution, excelling in the field of image-text question answering."
|
||
},
|
||
"tencent/Hunyuan-A13B-Instruct": {
|
||
"description": "Hunyuan-A13B-Instruct has 80 billion parameters, with 13 billion activated parameters matching the performance of larger models. It supports hybrid reasoning with 'fast thinking/slow thinking'; offers stable long-text comprehension; validated by BFCL-v3 and τ-Bench, demonstrating leading agent capabilities; integrates GQA and multiple quantization formats for efficient inference."
|
||
},
|
||
"tencent/Hunyuan-MT-7B": {
|
||
"description": "The Hunyuan Translation Model consists of the Hunyuan-MT-7B translation model and the integrated Hunyuan-MT-Chimera model. Hunyuan-MT-7B is a lightweight translation model with 7 billion parameters, designed to translate source text into target languages. It supports translation across 33 languages and 5 Chinese minority languages. In the WMT25 international machine translation competition, Hunyuan-MT-7B ranked first in 30 out of 31 language categories it participated in, showcasing its exceptional translation capabilities. Tencent's Hunyuan team has developed a comprehensive training paradigm for translation, encompassing pretraining, supervised fine-tuning, translation reinforcement, and integrated enhancement, achieving industry-leading performance among models of similar scale. The model is highly efficient and easy to deploy, making it suitable for a wide range of applications."
|
||
},
|
||
"text-embedding-3-large": {
|
||
"description": "The most powerful vectorization model, suitable for both English and non-English tasks."
|
||
},
|
||
"text-embedding-3-small": {
|
||
"description": "An efficient and cost-effective next-generation embedding model, suitable for knowledge retrieval, RAG applications, and more."
|
||
},
|
||
"thudm/glm-4-32b": {
|
||
"description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calls, and agent-based tasks. It has been pre-trained on 15T of high-quality and re-reasoning data and further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 in multiple benchmark tests."
|
||
},
|
||
"thudm/glm-4-32b:free": {
|
||
"description": "GLM-4-32B-0414 is a 32B bilingual (Chinese-English) open-weight language model optimized for code generation, function calls, and agent-based tasks. It has been pre-trained on 15T of high-quality and re-reasoning data and further refined using human preference alignment, rejection sampling, and reinforcement learning. The model excels in complex reasoning, artifact generation, and structured output tasks, achieving performance comparable to GPT-4o and DeepSeek-V3-0324 in multiple benchmark tests."
|
||
},
|
||
"thudm/glm-4-9b-chat": {
|
||
"description": "The open-source version of the latest generation pre-trained model from the GLM-4 series released by Zhiyuan AI."
|
||
},
|
||
"thudm/glm-z1-32b": {
|
||
"description": "GLM-Z1-32B-0414 is an enhanced reasoning variant of GLM-4-32B, built for deep mathematics, logic, and code-oriented problem solving. It applies extended reinforcement learning (task-specific and based on general pairwise preferences) to improve performance on complex multi-step tasks. Compared to the base GLM-4-32B model, Z1 significantly enhances capabilities in structured reasoning and formal domains.\n\nThis model supports enforcing 'thinking' steps through prompt engineering and provides improved coherence for long-format outputs. It is optimized for agent workflows and supports long context (via YaRN), JSON tool calls, and fine-grained sampling configurations for stable reasoning. It is ideal for use cases requiring thoughtful, multi-step reasoning or formal derivation."
|
||
},
|
||
"thudm/glm-z1-rumination-32b": {
|
||
"description": "THUDM: GLM Z1 Rumination 32B is a deep reasoning model with 32 billion parameters in the GLM-4-Z1 series, optimized for complex, open-ended tasks that require prolonged thought. It builds upon glm-4-32b-0414, adding additional reinforcement learning stages and multi-stage alignment strategies, introducing a 'rumination' capability designed to simulate extended cognitive processing. This includes iterative reasoning, multi-hop analysis, and tool-enhanced workflows such as search, retrieval, and citation-aware synthesis.\n\nThe model excels in research-style writing, comparative analysis, and complex question answering. It supports function calls for search and navigation primitives (`search`, `click`, `open`, `finish`), allowing it to be used in agent-based pipelines. The rumination behavior is shaped by rule-based rewards and a delayed decision-making mechanism, controlled by multi-round cycles, benchmarked against deep research frameworks like OpenAI's internal alignment stack. This variant is suitable for scenarios requiring depth over speed."
|
||
},
|
||
"tngtech/deepseek-r1t-chimera:free": {
|
||
"description": "DeepSeek-R1T-Chimera is created by merging DeepSeek-R1 and DeepSeek-V3 (0324), combining the reasoning capabilities of R1 with the token efficiency improvements of V3. It is based on the DeepSeek-MoE Transformer architecture and optimized for general text generation tasks.\n\nThis model merges the pre-trained weights of the two source models to balance performance in reasoning, efficiency, and instruction following tasks. It is released under the MIT license, intended for research and commercial use."
|
||
},
|
||
"togethercomputer/StripedHyena-Nous-7B": {
|
||
"description": "StripedHyena Nous (7B) provides enhanced computational capabilities through efficient strategies and model architecture."
|
||
},
|
||
"tts-1": {
|
||
"description": "The latest text-to-speech model, optimized for speed in real-time scenarios."
|
||
},
|
||
"tts-1-hd": {
|
||
"description": "The latest text-to-speech model, optimized for quality."
|
||
},
|
||
"upstage/SOLAR-10.7B-Instruct-v1.0": {
|
||
"description": "Upstage SOLAR Instruct v1 (11B) is suitable for refined instruction tasks, offering excellent language processing capabilities."
|
||
},
|
||
"us.anthropic.claude-3-5-sonnet-20241022-v2:0": {
|
||
"description": "Claude 3.5 Sonnet raises the industry standard, outperforming competitor models and Claude 3 Opus, excelling in a wide range of evaluations while maintaining the speed and cost of our mid-tier models."
|
||
},
|
||
"us.anthropic.claude-3-7-sonnet-20250219-v1:0": {
|
||
"description": "Claude 3.7 Sonnet is Anthropic's fastest next-generation model. Compared to Claude 3 Haiku, Claude 3.7 Sonnet shows improvements across various skills and surpasses the previous generation's largest model, Claude 3 Opus, in many intelligence benchmark tests."
|
||
},
|
||
"v0-1.0-md": {
|
||
"description": "The v0-1.0-md model is a legacy model served through the v0 API."
|
||
},
|
||
"v0-1.5-lg": {
|
||
"description": "The v0-1.5-lg model is suitable for advanced thinking or reasoning tasks."
|
||
},
|
||
"v0-1.5-md": {
|
||
"description": "The v0-1.5-md model is suitable for everyday tasks and user interface (UI) generation."
|
||
},
|
||
"vercel/v0-1.0-md": {
|
||
"description": "Access the model behind v0 to generate, fix, and optimize modern web applications, with framework-specific reasoning and up-to-date knowledge."
|
||
},
|
||
"vercel/v0-1.5-md": {
|
||
"description": "Access the model behind v0 to generate, fix, and optimize modern web applications, with framework-specific reasoning and up-to-date knowledge."
|
||
},
|
||
"wan2.2-t2i-flash": {
|
||
"description": "Wanxiang 2.2 Flash version, the latest model currently available. Fully upgraded in creativity, stability, and realism, with fast generation speed and high cost-effectiveness."
|
||
},
|
||
"wan2.2-t2i-plus": {
|
||
"description": "Wanxiang 2.2 Professional version, the latest model currently available. Fully upgraded in creativity, stability, and realism, generating images with rich details."
|
||
},
|
||
"wanx-v1": {
|
||
"description": "Basic text-to-image model corresponding to Tongyi Wanxiang official website's 1.0 general model."
|
||
},
|
||
"wanx2.0-t2i-turbo": {
|
||
"description": "Specializes in textured portraits, with moderate speed and low cost. Corresponds to Tongyi Wanxiang official website's 2.0 turbo model."
|
||
},
|
||
"wanx2.1-t2i-plus": {
|
||
"description": "Fully upgraded version. Generates images with richer details, slightly slower speed. Corresponds to Tongyi Wanxiang official website's 2.1 professional model."
|
||
},
|
||
"wanx2.1-t2i-turbo": {
|
||
"description": "Fully upgraded version. Fast generation speed, comprehensive effects, and high overall cost-effectiveness. Corresponds to Tongyi Wanxiang official website's 2.1 turbo model."
|
||
},
|
||
"whisper-1": {
|
||
"description": "A general-purpose speech recognition model supporting multilingual speech recognition, speech translation, and language identification."
|
||
},
|
||
"wizardlm2": {
|
||
"description": "WizardLM 2 is a language model provided by Microsoft AI, excelling in complex dialogues, multilingual capabilities, reasoning, and intelligent assistant applications."
|
||
},
|
||
"wizardlm2:8x22b": {
|
||
"description": "WizardLM 2 is a language model provided by Microsoft AI, excelling in complex dialogues, multilingual capabilities, reasoning, and intelligent assistant applications."
|
||
},
|
||
"x1": {
|
||
"description": "The Spark X1 model will undergo further upgrades, achieving results in reasoning, text generation, and language understanding tasks that match OpenAI o1 and DeepSeek R1, building on its leading position in domestic mathematical tasks."
|
||
},
|
||
"xai/grok-2": {
|
||
"description": "Grok 2 is a cutting-edge language model with state-of-the-art reasoning capabilities. It excels in chat, coding, and reasoning, outperforming Claude 3.5 Sonnet and GPT-4-Turbo on the LMSYS leaderboard."
|
||
},
|
||
"xai/grok-2-vision": {
|
||
"description": "Grok 2 Vision model excels at vision-based tasks, delivering state-of-the-art performance in visual math reasoning (MathVista) and document-based question answering (DocVQA). It can process various visual information including documents, charts, graphs, screenshots, and photos."
|
||
},
|
||
"xai/grok-3": {
|
||
"description": "xAI's flagship model, excelling in enterprise use cases such as data extraction, coding, and text summarization. It has deep domain knowledge in finance, healthcare, legal, and scientific fields."
|
||
},
|
||
"xai/grok-3-fast": {
|
||
"description": "xAI's flagship model excelling in enterprise use cases like data extraction, coding, and text summarization. The fast variant is served on faster infrastructure, providing much quicker response times at the cost of higher per-token output expenses."
|
||
},
|
||
"xai/grok-3-mini": {
|
||
"description": "xAI's lightweight model that thinks before responding. Ideal for simple or logic-based tasks that do not require deep domain knowledge. Raw thought traces are accessible."
|
||
},
|
||
"xai/grok-3-mini-fast": {
|
||
"description": "xAI's lightweight model that thinks before responding. Ideal for simple or logic-based tasks that do not require deep domain knowledge. Raw thought traces are accessible. The fast variant is served on faster infrastructure, providing much quicker response times at the cost of higher per-token output expenses."
|
||
},
|
||
"xai/grok-4": {
|
||
"description": "xAI's latest and greatest flagship model, delivering unparalleled performance in natural language, mathematics, and reasoning—an ideal all-rounder."
|
||
},
|
||
"yi-large": {
|
||
"description": "A new trillion-parameter model, providing super strong question-answering and text generation capabilities."
|
||
},
|
||
"yi-large-fc": {
|
||
"description": "Based on the yi-large model, supports and enhances tool invocation capabilities, suitable for various business scenarios requiring agent or workflow construction."
|
||
},
|
||
"yi-large-preview": {
|
||
"description": "Initial version, recommended to use yi-large (new version)."
|
||
},
|
||
"yi-large-rag": {
|
||
"description": "High-level service based on the yi-large super strong model, combining retrieval and generation techniques to provide precise answers and real-time information retrieval services."
|
||
},
|
||
"yi-large-turbo": {
|
||
"description": "Exceptional performance at a high cost-performance ratio. Conducts high-precision tuning based on performance, inference speed, and cost."
|
||
},
|
||
"yi-lightning": {
|
||
"description": "The latest high-performance model, ensuring high-quality output while significantly improving reasoning speed."
|
||
},
|
||
"yi-lightning-lite": {
|
||
"description": "A lightweight version, recommended to use yi-lightning."
|
||
},
|
||
"yi-medium": {
|
||
"description": "Medium-sized model upgraded and fine-tuned, balanced capabilities, and high cost-performance ratio. Deeply optimized instruction-following capabilities."
|
||
},
|
||
"yi-medium-200k": {
|
||
"description": "200K ultra-long context window, providing deep understanding and generation capabilities for long texts."
|
||
},
|
||
"yi-spark": {
|
||
"description": "Small yet powerful, lightweight and fast model. Provides enhanced mathematical computation and coding capabilities."
|
||
},
|
||
"yi-vision": {
|
||
"description": "Model for complex visual tasks, providing high-performance image understanding and analysis capabilities."
|
||
},
|
||
"yi-vision-v2": {
|
||
"description": "A complex visual task model that provides high-performance understanding and analysis capabilities based on multiple images."
|
||
},
|
||
"zai-org/GLM-4.5": {
|
||
"description": "GLM-4.5 is a foundational model designed specifically for agent applications, using a Mixture-of-Experts (MoE) architecture. It is deeply optimized for tool invocation, web browsing, software engineering, and front-end programming, supporting seamless integration with code agents like Claude Code and Roo Code. GLM-4.5 employs a hybrid inference mode, adaptable to complex reasoning and everyday use scenarios."
|
||
},
|
||
"zai-org/GLM-4.5-Air": {
|
||
"description": "GLM-4.5-Air is a foundational model designed specifically for agent applications, using a Mixture-of-Experts (MoE) architecture. It is deeply optimized for tool invocation, web browsing, software engineering, and front-end programming, supporting seamless integration with code agents like Claude Code and Roo Code. GLM-4.5 employs a hybrid inference mode, adaptable to complex reasoning and everyday use scenarios."
|
||
},
|
||
"zai-org/GLM-4.5V": {
|
||
"description": "GLM-4.5V is the latest-generation vision-language model (VLM) released by Zhipu AI. It is built on the flagship text model GLM-4.5-Air, which has 106B total parameters and 12B active parameters, and adopts a Mixture-of-Experts (MoE) architecture to deliver outstanding performance at reduced inference cost. Technically, GLM-4.5V continues the trajectory of GLM-4.1V-Thinking and introduces innovations such as three-dimensional rotary position encoding (3D-RoPE), significantly improving perception and reasoning of three-dimensional spatial relationships. Through optimizations across pretraining, supervised fine-tuning, and reinforcement learning stages, the model can handle a wide range of visual content including images, video, and long documents, and has achieved top-tier performance among comparable open-source models across 41 public multimodal benchmarks. The model also adds a \"Thinking Mode\" toggle that lets users flexibly choose between fast responses and deep reasoning to balance efficiency and effectiveness."
|
||
},
|
||
"zai-org/GLM-4.6": {
|
||
"description": "Compared to GLM-4.5, GLM-4.6 introduces several key improvements. Its context window expands from 128K to 200K tokens, enabling the model to handle more complex agent tasks. The model achieves higher scores on code benchmarks and demonstrates stronger real-world performance in applications such as Claude Code, Cline, Roo Code, and Kilo Code, including improvements in generating visually refined front-end pages. GLM-4.6 shows significant enhancements in inference performance and supports tool usage during inference, resulting in stronger overall capabilities. It excels in tool utilization and search-based agents and integrates more effectively into agent frameworks. In writing, the model better aligns with human preferences in style and readability and performs more naturally in role-playing scenarios."
|
||
},
|
||
"zai/glm-4.5": {
|
||
"description": "The GLM-4.5 series models are foundational models specifically designed for agents. The flagship GLM-4.5 integrates 355 billion total parameters (32 billion active), unifying reasoning, coding, and agent capabilities to address complex application needs. As a hybrid reasoning system, it offers dual operating modes."
|
||
},
|
||
"zai/glm-4.5-air": {
|
||
"description": "GLM-4.5 and GLM-4.5-Air are our latest flagship models, specifically designed as foundational models for agent applications. Both utilize a Mixture of Experts (MoE) architecture. GLM-4.5 has 355 billion total parameters with 32 billion active per forward pass, while GLM-4.5-Air features a streamlined design with 106 billion total parameters and 12 billion active."
|
||
},
|
||
"zai/glm-4.5v": {
|
||
"description": "GLM-4.5V is built on the GLM-4.5-Air foundational model, inheriting the proven techniques of GLM-4.1V-Thinking while achieving efficient scaling through a powerful 106 billion parameter MoE architecture."
|
||
}
|
||
}
|