LG 엑사원 설치 방법 및 테스트

컴퓨터

LG 엑사원 설치 방법 및 테스트

Zyss 2025. 7. 23. 12:04

https://www.python.org/downloads/windows/

Python Releases for Windows

The official home of the Python Programming Language

www.python.org

파이썬 설치 체크 항목
Add python.exe to PATH
Use admin privileges when installing py.exe

재부팅

파이썬 버전 확인
python --version
pip --version

https://huggingface.co/LGAI-EXAONE

LGAI-EXAONE (LG AI Research)

LG AI EXAONE Welcome to the official HuggingFace organization of EXAONE, which refers to the family of Large Language Models (LLMs) and Large Multimodal Models (LMMs) developed by LG AI Research. EXAONE stands for EXpert AI for EveryONE, a vision that LG i

huggingface.co

사이트 가입하고, 이메일 인증 후
프로필 Access Tokens 에서 토큰 발급

허깅페이스 라이브러리 설치
pip install transformers torch accelerate huggingface-hub

허깅페이스 로그인
huggingface-cli login
토큰키 입력
n입력

cmd에서 실행
ex.py [별도 파일 생성]

필요 라이브러리 자동 설치
설치 및 로딩중 시간 다소 소요

=============
토큰 변경이 있을 때
기존 로그인 정보 삭제
huggingface-cli logout

=======

공식 CUDA 버전 설치 (CUDA 지원 그래픽 있음에도 GPU 이용 안할 때 설치)

pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

https://download.pytorch.org/whl/cu118

download.pytorch.org

======= 4.0용 llama-cpp 설치 =======

아래 방법으로 진행이 안될 때 C++를 사용한 데스크톱 개발 설치
pip install llama-cpp-python --force-reinstall --no-deps --index-url https://abetlen.github.io/llama-cpp-python/whl/cu121

Microsoft C++ Build Tools

https://visualstudio.microsoft.com/ko/visual-cpp-build-tools/

Microsoft C++ Build Tools - Visual Studio

visualstudio.microsoft.com

재부팅

cmd창에서 llama-cpp 설치

pip uninstall llama-cpp-python -y
pip install llama-cpp-python --pre --no-cache-dir

4.0 버전 테스트중

ex.py 파일 // cpu, gpu 사용 버전

import os
import sys

# ===========================================
# 🔧 엔진 및 모델 선택 섹션
# ===========================================

# 엔진 선택 (하나만 활성화)
ENGINE = "transformers"  # 3.5 버전 사용 (Transformers)
#ENGINE = "llama_cpp"   # 4.0 버전 사용 (llama.cpp)

if ENGINE == "transformers":
    # 🤖 엑사원 3.5 모델 선택 (하나만 활성화)
    MODEL_NAME = "LGAI-EXAONE/EXAONE-3.5-2.4B-Instruct"   # 빠름, 가벼움 (추천)
    # MODEL_NAME = "LGAI-EXAONE/EXAONE-3.5-7.8B-Instruct"  # 균형잡힌 성능
    # MODEL_NAME = "LGAI-EXAONE/EXAONE-3.5-32B-Instruct"   # 최고 성능 (느림)
    
elif ENGINE == "llama_cpp":
    # 🆕 엑사원 4.0 모델 선택 (하나만 활성화)
    MODEL_NAME = "LGAI-EXAONE/EXAONE-4.0-1.2B-GGUF"   # 빠름, 최신 (추천)
    # MODEL_NAME = "LGAI-EXAONE/EXAONE-4.0-32B-GGUF"    # 최고 성능, 최신 (느림)

# ===========================================
# 🚀 엔진별 실행 코드
# ===========================================

def check_dependencies():
    """필요한 라이브러리 설치 확인"""
    missing_libs = []
    
    if ENGINE == "transformers":
        try:
            import torch
            import transformers
            print("✅ Transformers 라이브러리 확인됨")
        except ImportError:
            missing_libs.extend(["torch", "transformers"])
            
    elif ENGINE == "llama_cpp":
        try:
            import llama_cpp
            print("✅ llama-cpp-python 라이브러리 확인됨")
        except ImportError:
            missing_libs.append("llama-cpp-python")
    
    if missing_libs:
        print(f"❌ 누락된 라이브러리: {', '.join(missing_libs)}")
        print("\n📦 설치 명령어:")
        
        if ENGINE == "transformers":
            print("pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118")
            print("pip install transformers accelerate")
            
        elif ENGINE == "llama_cpp":
            print("pip install llama-cpp-python")
            print("# GPU 가속 버전:")
            print("pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121")
        
        print("\n위 명령어를 실행한 후 다시 시도해주세요!")
        return False
    
    return True

def run_transformers():
    """엑사원 3.5 실행 (Transformers)"""
    from transformers import AutoTokenizer, AutoModelForCausalLM
    import torch
    
    print("🤖 엑사원 3.5 (Transformers) 초기화 중...")
    print("GPU 확인 중...")
    
    # GPU 설정
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"사용 장치: {device}")
    
    if device == "cuda":
        print(f"GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU 메모리: {torch.cuda.get_device_properties(0).total_memory // 1024**3}GB")
    
    print(f"모델 로딩 중: {MODEL_NAME}")
    
    # 모델 로딩
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME, 
        trust_remote_code=True,
        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        device_map="auto" if device == "cuda" else None
    )
    
    if device == "cpu":
        model = model.to(device)
    
    print("\n" + "="*60)
    print("🤖 엑사원 3.5와 대화를 시작합니다! (Transformers)")
    print(f"📱 모델: {MODEL_NAME}")
    print("종료하려면 'quit' 또는 'exit'를 입력하세요.")
    print("="*60 + "\n")
    
    # 대화 루프
    while True:
        user_input = input("👤 당신: ")
        
        if user_input.lower() in ['quit', 'exit', '종료', '나가기']:
            print("👋 대화를 종료합니다. 안녕히 가세요!")
            break
        
        if not user_input.strip():
            print("질문을 입력해주세요.")
            continue
        
        print("🤖 엑사원 3.5: 생각 중...")
        
        try:
            messages = [{"role": "user", "content": user_input}]
            input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
            inputs = tokenizer(input_text, return_tensors="pt").to(device)
            
            with torch.no_grad():
                outputs = model.generate(
                    **inputs, 
                    max_new_tokens=200, 
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id
                )
            
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            if input_text in response:
                response = response.replace(input_text, "").strip()
            
            print(f"🤖 엑사원 3.5: {response}")
            print("-" * 50)
            
        except Exception as e:
            print(f"❌ 오류가 발생했습니다: {e}")
            print("-" * 50)

def run_llama_cpp():
    """엑사원 4.0 실행 (llama.cpp)"""
    from llama_cpp import Llama
    import time
    
    print("🆕 엑사원 4.0 (llama.cpp) 초기화 중...")
    print(f"모델 로딩 중: {MODEL_NAME}")
    print("⏳ 최초 실행시 모델 다운로드로 시간이 걸릴 수 있습니다...")
    
    try:
        # 모델 로드
        llm = Llama.from_pretrained(
            repo_id=MODEL_NAME,
            filename="*.gguf",
            n_gpu_layers=-1,  # 모든 레이어를 GPU에
            n_ctx=2048,       # 컨텍스트 길이
            verbose=False
        )
        
        print("\n" + "="*60)
        print("🆕 엑사원 4.0과 대화를 시작합니다! (llama.cpp)")
        print(f"📱 모델: {MODEL_NAME}")
        print("종료하려면 'quit' 또는 'exit'를 입력하세요.")
        print("="*60 + "\n")
        
        # 대화 루프
        while True:
            user_input = input("👤 당신: ")
            
            if user_input.lower() in ['quit', 'exit', '종료', '나가기']:
                print("👋 대화를 종료합니다. 안녕히 가세요!")
                break
            
            if not user_input.strip():
                print("질문을 입력해주세요.")
                continue
            
            print("🤖 엑사원 4.0: 생각 중...")
            
            try:
                # 엑사원 4.0 프롬프트 형식
                prompt = f"<|user|>\n{user_input}<|end|>\n<|assistant|>\n"
                
                start_time = time.time()
                
                # 응답 생성
                response = llm(
                    prompt,
                    max_tokens=200,
                    temperature=0.7,
                    stop=["<|end|>", "<|user|>"],
                    echo=False
                )
                
                end_time = time.time()
                
                answer = response['choices'][0]['text'].strip()
                
                print(f"🤖 엑사원 4.0: {answer}")
                print(f"⏱️ 응답 시간: {end_time - start_time:.2f}초")
                print("-" * 50)
                
            except Exception as e:
                print(f"❌ 오류가 발생했습니다: {e}")
                print("-" * 50)
    
    except Exception as e:
        print(f"❌ 모델 로딩 실패: {e}")
        print("💡 해결책:")
        print("1. 인터넷 연결 확인")
        print("2. 허깅페이스 로그인 확인: huggingface-cli login")
        print("3. GPU 메모리 부족시 더 작은 모델 선택")

# ===========================================
# 🎯 메인 실행부
# ===========================================

def main():
    print("🚀 엑사원 통합 런처")
    print(f"🔧 선택된 엔진: {ENGINE}")
    print(f"📱 선택된 모델: {MODEL_NAME}")
    print("-" * 60)
    
    # 라이브러리 확인
    if not check_dependencies():
        return
    
    # 엔진별 실행
    if ENGINE == "transformers":
        run_transformers()
    elif ENGINE == "llama_cpp":
        run_llama_cpp()
    else:
        print("❌ 잘못된 엔진 선택!")
        print("ENGINE을 'transformers' 또는 'llama_cpp'로 설정하세요.")

if __name__ == "__main__":
    main()

저작자표시 비영리 변경금지 (새창열림)

'컴퓨터' 카테고리의 다른 글

그린캐드 라이선스 풀림 해결법 (0)	2025.08.20
그린캐드 탐색기 파일 연결 오류 해결 방법 (0)	2025.07.22
AUTOCAD 2015LT 멈춤과 버벅임 증상 완화 (0)	2024.12.24
브라우저 이미지 저장시 .jfif 확장자를 다르게 저장하는 방법 (0)	2024.08.22
TCP / UDP 포트 열기 (0)	2024.07.25

현재글LG 엑사원 설치 방법 및 테스트

나를 위해, 오시는 분을 위해서 끄적거리는 공간.

11-25 18:02

Today :
Yesterday :

블록, 자동차, 파인드라이브ai3, 윈도우10, 중음, 아들, 공장초기화, 홍콩음악, 중음추천, 마리오, AI3, 중국음악, 블럭, 대만음악, 사자성어, ai2, 아틀란, 파인드라이브, 고사이야기, 고사성어,

快乐不快乐