Kết quả chạy test model Deepseek-R1 671b 2.58bits
--model /home/achilles/projects/download-huggingface/DeepSeek-R1-GGUF/DeepSeek-R1-UD-Q2_K_XL/DeepSeek-R1-UD-Q2_K_XL-00001-of-00005.gguf \
--cache-type-k q4_0 \
--threads 80 -no-cnv --prio 2 \
--n-gpu-layers 7 \
--temp 0.6 \
--ctx-size 8192 \
--seed 3407 \
--prompt "<|User|>Create a Flappy Bird game in Python.<|Assistant|>"
llama_perf_sampler_print: sampling time = 312.60 ms / 3589 runs ( 0.09 ms per token, 11481.27 tokens per second)
llama_perf_context_print: load time = 130139.24 ms
llama_perf_context_print: prompt eval time = 450.40 ms / 12 tokens ( 37.53 ms per token, 26.64 tokens per second)
llama_perf_context_print: eval time = 472243.30 ms / 3576 runs ( 132.06 ms per token, 7.57 tokens per second)
llama_perf_context_print: total time = 473408.47 ms / 3588 tokens
Đang cập nhật