Announcement

👇Official Account👇

图片

Welcome to join the group & private message

Article first/tail QR code

Skip to content

大模型应用开发指南

大语言模型(LLM)正在改变软件开发的方式。本文将系统介绍大模型应用开发的核心技术和最佳实践。

大模型基础

主流模型对比

模型提供商上下文长度特点
GPT-4OpenAI128K强大的推理能力
GPT-3.5OpenAI16K性价比高
Claude 3Anthropic200K长文本处理
Gemini ProGoogle1M多模态能力
Llama 3Meta8K开源可部署
Qwen阿里云128K中文优化

API 调用基础

python
import openai

# OpenAI API
client = openai.OpenAI(api_key="your-api-key")

response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "你是一个专业的编程助手。"},
        {"role": "user", "content": "解释什么是递归。"}
    ],
    temperature=0.7,
    max_tokens=500
)

print(response.choices[0].message.content)

Prompt Engineering

基础技巧

python
# 1. 明确指令
prompt = """
请将以下英文翻译成中文:
英文:Hello, how are you?
中文:
"""

# 2. 提供上下文
prompt = """
背景:你是一个专业的 Python 开发者。
任务:解释以下代码的作用。
代码:
def fibonacci(n):
    if n <= 1:
        return n
    return fibonacci(n-1) + fibonacci(n-2)
"""

# 3. 示例引导(Few-shot)
prompt = """
将自然语言转换为 SQL 查询:

示例 1:
查询:显示所有年龄大于 25 岁的用户
SQL:SELECT * FROM users WHERE age > 25;

示例 2:
查询:统计每个部门的员工数量
SQL:SELECT department, COUNT(*) FROM employees GROUP BY department;

现在转换:
查询:找出订单金额最高的前 10 个客户
SQL:
"""

高级技巧

Chain of Thought

python
prompt = """
问题:一个农场有鸡和兔,头共 35 个,脚共 94 只。鸡兔各几只?

请按以下步骤思考:
1. 设鸡有 x 只,兔有 y 只
2. 根据头的数量列出方程
3. 根据脚的数量列出方程
4. 解方程组
5. 验证答案

详细解答:
"""

ReAct 模式

python
prompt = """
你可以使用以下工具:
- search(query): 搜索信息
- calculator(expression): 计算表达式

问题:2023 年诺贝尔物理学奖得主是谁?他/她的主要贡献是什么?

请按以下格式回答:
思考:我需要搜索 2023 年诺贝尔物理学奖的信息
行动:search("2023 年诺贝尔物理学奖得主")
观察:[搜索结果]
思考:...
"""

RAG 系统开发

架构设计

┌──────────┐    ┌──────────┐    ┌──────────┐    ┌──────────┐
│  文档    │───▶│  分块    │───▶│ Embedding│───▶│ 向量库   │
│  数据    │    │  处理    │    │   模型   │    │          │
└──────────┘    └──────────┘    └──────────┘    └────┬─────┘

┌──────────┐    ┌──────────┐    ┌──────────┐         │
│   用户   │───▶│  Query   │───▶│  相似度  │◀────────┘
│   问题   │    │ Embedding│    │  搜索    │
└──────────┘    └──────────┘    └────┬─────┘


                              ┌──────────────┐
                              │  LLM 生成    │
                              │ (上下文+问题)│
                              └──────────────┘

完整实现

python
from langchain import OpenAIEmbeddings, FAISS, OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
import os

# 1. 文档加载和分块
def load_and_split_documents(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        text = f.read()
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
    )
    
    chunks = text_splitter.split_text(text)
    return chunks

# 2. 创建向量库
def create_vector_store(chunks):
    embeddings = OpenAIEmbeddings(
        model="text-embedding-3-small",
        api_key=os.getenv("OPENAI_API_KEY")
    )
    
    vector_store = FAISS.from_texts(
        texts=chunks,
        embedding=embeddings
    )
    
    return vector_store

# 3. 构建 RAG 链
def create_rag_chain(vector_store):
    llm = OpenAI(
        model="gpt-4",
        temperature=0.7,
        api_key=os.getenv("OPENAI_API_KEY")
    )
    
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(
            search_kwargs={"k": 3}
        ),
        return_source_documents=True
    )
    
    return qa_chain

# 4. 使用
chunks = load_and_split_documents("document.txt")
vector_store = create_vector_store(chunks)
qa_chain = create_rag_chain(vector_store)

result = qa_chain({"query": "文档的主要内容是什么?"})
print(result["result"])
print("来源:", [doc.page_content[:100] for doc in result["source_documents"]])

高级 RAG 技术

混合检索

python
from langchain.retrievers import BM25Retriever, EnsembleRetriever

# 创建 BM25 检索器
bm25_retriever = BM25Retriever.from_texts(chunks)
bm25_retriever.k = 3

# 创建向量检索器
vector_retriever = vector_store.as_retriever(search_kwargs={"k": 3})

# 组合检索器
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, vector_retriever],
    weights=[0.5, 0.5]
)

# 使用
results = ensemble_retriever.get_relevant_documents("查询问题")

重排序

python
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

# 创建压缩器
compressor = LLMChainExtractor.from_llm(llm)

# 创建压缩检索器
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vector_store.as_retriever()
)

# 使用
compressed_docs = compression_retriever.get_relevant_documents("查询问题")

Function Calling

基础用法

python
import json

def get_weather(location, unit="celsius"):
    """获取指定位置的天气信息"""
    # 实际实现会调用天气 API
    return {"temperature": 25, "condition": "sunny", "location": location}

def calculate(expression):
    """计算数学表达式"""
    try:
        result = eval(expression)
        return {"result": result}
    except:
        return {"error": "Invalid expression"}

# 定义函数工具
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "获取指定位置的天气信息",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "城市名称,如北京、上海"
                    },
                    "unit": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "description": "温度单位"
                    }
                },
                "required": ["location"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "calculate",
            "description": "计算数学表达式",
            "parameters": {
                "type": "object",
                "properties": {
                    "expression": {
                        "type": "string",
                        "description": "数学表达式,如 2 + 2"
                    }
                },
                "required": ["expression"]
            }
        }
    }
]

# 调用
response = client.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "user", "content": "北京今天天气怎么样?然后计算 15 * 23"}
    ],
    tools=tools,
    tool_choice="auto"
)

# 处理函数调用
message = response.choices[0].message
if message.tool_calls:
    for tool_call in message.tool_calls:
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        
        if function_name == "get_weather":
            result = get_weather(**function_args)
        elif function_name == "calculate":
            result = calculate(**function_args)
        
        print(f"函数 {function_name} 返回:{result}")

Agent 开发

ReAct Agent

python
from langchain.agents import Tool, AgentExecutor, create_react_agent
from langchain import OpenAI
from langchain.prompts import PromptTemplate

# 定义工具
tools = [
    Tool(
        name="Search",
        func=lambda x: f"搜索 '{x}' 的结果",
        description="用于搜索信息"
    ),
    Tool(
        name="Calculator",
        func=lambda x: str(eval(x)),
        description="用于计算数学表达式"
    )
]

# 创建提示模板
template = """Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}"""

prompt = PromptTemplate.from_template(template)

# 创建 Agent
llm = OpenAI(temperature=0)
agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

# 运行
result = agent_executor.invoke({"input": "计算 25 的平方,然后搜索 Python"})
print(result["output"])

自定义 Agent

python
from typing import List, Dict, Any
import openai

class SimpleAgent:
    def __init__(self, tools: List[Dict], model: str = "gpt-4"):
        self.tools = {tool["function"]["name"]: tool for tool in tools}
        self.model = model
        self.client = openai.OpenAI()
    
    def execute_tool(self, tool_name: str, arguments: Dict) -> Any:
        """执行工具函数"""
        # 这里应该调用实际的工具实现
        return f"Executed {tool_name} with {arguments}"
    
    def run(self, query: str, max_iterations: int = 5) -> str:
        """运行 Agent"""
        messages = [
            {"role": "system", "content": "你是一个智能助手,可以使用工具来解决问题。"},
            {"role": "user", "content": query}
        ]
        
        for _ in range(max_iterations):
            response = self.client.chat.completions.create(
                model=self.model,
                messages=messages,
                tools=list(self.tools.values()),
                tool_choice="auto"
            )
            
            message = response.choices[0].message
            
            # 如果没有工具调用,直接返回结果
            if not message.tool_calls:
                return message.content
            
            # 执行工具调用
            messages.append(message)
            
            for tool_call in message.tool_calls:
                function_name = tool_call.function.name
                arguments = json.loads(tool_call.function.arguments)
                
                result = self.execute_tool(function_name, arguments)
                
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,
                    "content": str(result)
                })
        
        return "达到最大迭代次数"

# 使用
agent = SimpleAgent(tools=tools)
result = agent.run("北京天气怎么样?")
print(result)

生产部署

性能优化

python
# 1. 使用缓存
from functools import lru_cache

@lru_cache(maxsize=1000)
def cached_embedding(text: str):
    return embeddings.embed_query(text)

# 2. 异步处理
import asyncio

async def async_llm_call(messages):
    return await client.chat.completions.create(
        model="gpt-4",
        messages=messages
    )

# 3. 流式输出
response = client.chat.completions.create(
    model="gpt-4",
    messages=messages,
    stream=True
)

for chunk in response:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="")

安全考虑

python
# 1. 输入验证
def sanitize_input(user_input: str) -> str:
    # 移除潜在的危险字符
    dangerous = ["<script>", "javascript:", "onerror="]
    for d in dangerous:
        user_input = user_input.replace(d, "")
    return user_input

# 2. 输出过滤
def filter_output(output: str) -> str:
    # 过滤敏感信息
    sensitive_patterns = [r"\b\d{16}\b", r"password[=:]\s*\S+"]
    for pattern in sensitive_patterns:
        output = re.sub(pattern, "[REDACTED]", output)
    return output

# 3. 速率限制
from ratelimit import limits, sleep_and_retry

@sleep_and_retry
@limits(calls=100, period=60)
def rate_limited_call():
    return client.chat.completions.create(...)

总结

大模型应用开发涉及多个技术领域,从 Prompt Engineering 到 RAG、Function Calling 和 Agent,每个环节都需要深入理解和实践。


参考资源:

上次更新于: