了解更多詳細(xì)信息,請(qǐng)致電
發(fā)布時(shí)間:2025-10-16
# 安裝Python和虛擬環(huán)境工具sudo apt update && sudo apt install -y python3 python3-venv python3-pip git gcc# 創(chuàng)建虛擬環(huán)境并激活python3 -m venv phi3-envsource phi3-env/bin/activate # Ubuntu/Debian# 若為CentOS:source phi3-env/bin/activate# 升級(jí)pippip install --upgrade pip# 安裝PyTorch(CPU版,適配低配置)pip3 install torch==2.1.0+cpu torchvision==0.16.0+cpu torchaudio==2.1.0+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html# 安裝模型運(yùn)行依賴pip install transformers==4.38.2 accelerate==0.30.1 sentencepiece==0.1.99 flask==2.3.3 # flask用于搭建API服務(wù)# 安裝模型下載工具(可選,加速下載)pip install huggingface-hub[cli]# 登錄Hugging Face(需注冊(cè)賬號(hào),獲取訪問令牌:https://huggingface.co/settings/tokens)huggingface-cli login# 下載INT4量化版模型(約4GB,8G內(nèi)存適配最佳)huggingface-cli download microsoft/Phi-3-mini-4K-Instruct --local-dir phi3-model --local-dir-use-symlinks False --revision mainfrom flask import Flask, request, jsonifyfrom transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfigapp = Flask(__name__)# 配置量化參數(shù)(關(guān)鍵:降低內(nèi)存占用)bnb_config = BitsAndBytesConfig( load_in_4bit=True, # 啟用4位量化 bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float32)# 加載模型和tokenizertokenizer = AutoTokenizer.from_pretrained("./phi3-model")model = AutoModelForCausalLM.from_pretrained( "./phi3-model", quantization_config=bnb_config, device_map="auto", # 自動(dòng)分配設(shè)備(CPU優(yōu)先) trust_remote_code=True)# 定義生成函數(shù)(私人AI助手核心邏輯)def generate_response(prompt, max_new_tokens=512, temperature=0.7): inputs = tokenizer( f"<|user|>\n{prompt}\n<|assistant|>", return_tensors="pt", truncation=True, max_length=4096 ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=True, eos_token_id=tokenizer.eos_token_id ) return tokenizer.decode(outputs[0], skip_special_tokens=True).split("<|assistant|>")[-1].strip()# 搭建API接口(支持HTTP調(diào)用)@app.route("/api/chat", methods=["POST"])def chat_api(): data = request.json prompt = data.get("prompt", "請(qǐng)介紹一下自己") response = generate_response(prompt) return jsonify({"response": response})if __name__ == "__main__": # 啟動(dòng)服務(wù)(默認(rèn)端口5000,允許外網(wǎng)訪問) app.run(host="0.0.0.0", port=5000, debug=False) # 生產(chǎn)環(huán)境關(guān)閉debug# 后臺(tái)啟動(dòng)服務(wù)(避免終端關(guān)閉后停止)nohup python phi3_server.py > phi3.log 2>&1 &# 查看啟動(dòng)日志(確認(rèn)是否成功)tail -f phi3.log# 成功標(biāo)識(shí):"Running on http://0.0.0.0:5000"# 使用curl測試APIcurl -X POST http://你的服務(wù)器IP:5000/api/chat \-H "Content-Type: application/json" \-d '{"prompt": "請(qǐng)幫我寫一個(gè)Python爬蟲腳本,爬取網(wǎng)頁標(biāo)題"}'
sudo fallocate -l 4G /swapfilesudo chmod 600 /swapfilesudo mkswap /swapfile && sudo swapon /swapfilepip install gunicorngunicorn -w 2 -b 0.0.0.0:5000 phi3_server:app # 2個(gè)工作進(jìn)程,匹配2核CPU