Skip to content

DBUpdater.py실행시 오류를 해결하는데 어려움이 있어요. 도와주세요~ #170

@hawkeye213

Description

@hawkeye213

파이썬 증권데이터분석 책이 아주 잘 설명이 되어 있어 혼자 공부하기에 훌륭한 교재라고 생각되네요. 5장. 일별시세조회하는 부분 등에서 에러가 발생하기 시작하네요. 어떻게 해결해야 할지 며칠째 고민하다가 질문드립니다.

실행시 에러화면입니다.

UserWarning: pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy
warnings.warn(
C:\Anaconda3\lib\site-packages\pandas\io\sql.py:761: UserWarning: pandas only support SQLAlchemy connectable(engine/connection) ordatabase string URI or sqlite3 DBAPI2 connectionother DBAPI2 objects are not tested, please consider using SQLAlchemy
warnings.warn(

DBUpdater.py 입력한 내용은

mport pandas as pd
from bs4 import BeautifulSoup
import urllib, pymysql, calendar, time, json
import requests
from urllib.request import urlopen
from datetime import datetime
from threading import Timer
import pymysql

class DBUpdater:
def init(self):
"""생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
self.conn = pymysql.connect(host='localhost', user='root',
password=***********', db='INVESTAR', charset='utf8')

    with self.conn.cursor() as curs:
        sql = """
       CREATE TABLE IF NOT EXISTS company_info (
            code VARCHAR(20),
            company VARCHAR(40),
            last_update DATE,
            PRIMARY KEY (code)
        );

        """
        curs.execute(sql)
        sql = """
        CREATE TABLE IF NOT EXISTS daily_price (
            code VARCHAR(20),
            date DATE,
            open BIGINT(20),
            high BIGINT(20),
            low BIGINT(20),
            close BIGINT(20),
            diff BIGINT(20),
            volume BIGINT(20),
            PRIMARY KEY (code, date)
        );

        """
        curs.execute(sql)

    self.conn.commit()

    self.codes = dict()
    self.update_comp_info() 

def __del__(self):
    """소멸자: MariaDB 연결 해제"""
    self.conn.close()

def read_krx_code(self):
    """KRX로부터 상장기업 목록 파일을 읽어와서 데이터프레임으로 반환"""
    url = 'https://kind.krx.co.kr/corpgeneral/corpList.do?method=download&seachType=13'
    krx = pd.read_html(url, header=0)[0]  # 상장법인목록.xls 파일을 read_html()함수로 읽는다.
    krx = krx[['종목코드', '회사명']]      
    krx = krx.rename(columns={'종목코드': 'code', '회사명': 'company'}) 
    krx.code = krx.code.map('{:06d}'.format) 
    return krx


def update_comp_info(self):
    """종목코드를 company_info 테이블에 업데이트 한 후 딕셔너리에 저장"""
    sql = "SELECT * FROM company_info"
    df = pd.read_sql(sql, self.conn)  
    for idx in range(len(df)):
        self.codes[df['code'].values[idx]] = df['company'].values[idx] 

    with self.conn.cursor() as curs:
        sql = "SELECT max(last_update) FROM company_info"
        curs.execute(sql)
        rs = curs.fetchone()  
        today = datetime.today().strftime('%Y-%m-%d')
        if rs[0] == None or rs[0].strftime('%Y-%m-%d') < today:  
            krx = self.read_krx_code()  
            for idx in range(len(krx)):
                code = krx.code.values[idx]
                company = krx.company.values[idx]
                sql = f"REPLACE INTO company_info (code, company, last"\
                    f"_update) VALUES ('{code}', '{company}', '{today}')"
                curs.execute(sql) 
                self.codes[code] = company  
                tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
                print(f"[{tmnow}] #{idx+1:04d} REPLACE INTO company_info "\
                    f"VALUES ({code}, {company}, {today})")
            self.conn.commit()
            print('')

def read_naver(self, code, company, pages_to_fetch):
    """네이버에서 주식 시세를 읽어서 데이터프레임으로 반환"""
    try:
        url = f"http://finance.naver.com/item/sise_day.nhn?code={code}"
        html = BeautifulSoup(requests.get(url,
            headers={'User-agent': 'Mozilla/5.0'}).text, "lxml")
        pgrr = html.find("td", class_="pgRR")
        if pgrr is None:
            return None
        s = str(pgrr.a["href"]).split('=')
        lastpage = s[-1]
        df = pd.DataFrame()
        pages = min(int(lastpage), pages_to_fetch)
        for page in range(1, pages + 1):
            pg_url = '{}&page={}'.format(url, page)
            df = df.append(pd.read_html(requests.get(pg_url,
                headers={'User-agent': 'Mozilla/5.0'}).text)[0])
            tmnow = datetime.now().strftime('%Y-%m-%d %H:%M')
            print('[{}] {} ({}) : {:04d}/{:04d} pages are downloading...'.
                format(tmnow, company, code, page, pages), end="\r")
        df = df.rename(columns={'날짜':'date','종가':'close','전일비':'diff'
            ,'시가':'open','고가':'high','저가':'low','거래량':'volume'})
        df['date'] = df['date'].replace('.', '-')
        df = df.dropna()
        df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close',
            'diff', 'open', 'high', 'low', 'volume']].astype(int)
        df = df[['date', 'open', 'high', 'low', 'close', 'diff', 'volume']]
    except Exception as e:
        print('Exception occured :', str(e))
        return None
    return df

def replace_into_db(self, df, num, code, company):
    """네이버에서 읽어온 주식 시세를 DB에 REPLACE"""
    with self.conn.cursor() as curs:
        for r in df.itertuples():
            sql = f"REPLACE INTO daily_price VALUES ('{code}', "\
                f"'{r.date}', {r.open}, {r.high}, {r.low}, {r.close}, "\
                f"{r.diff}, {r.volume})"
            curs.execute(sql)
        self.conn.commit()
        print('[{}] #{:04d} {} ({}) : {} rows > REPLACE INTO daily_'\
            'price [OK]'.format(datetime.now().strftime('%Y-%m-%d'\
            ' %H:%M'), num+1, company, code, len(df)))

def update_daily_price(self, pages_to_fetch):
    """KRX 상장법인의 주식 시세를 네이버로부터 읽어서 DB에 업데이트"""
    for idx, code in enumerate(self.codes):
        df = self.read_naver(code, self.codes[code], pages_to_fetch)
        if df is None:
            continue
        self.replace_into_db(df, idx, code, self.codes[code])

def execute_daily(self):
    """실행 즉시 및 매일 오후 다섯시에 daily_price 테이블 업데이트"""
    self.update_comp_info()

    try:
        with open('config.json', 'r') as in_file:
            config = json.load(in_file)
            pages_to_fetch = config['pages_to_fetch']
    except FileNotFoundError:
        with open('config.json', 'w') as out_file:
            pages_to_fetch = 100
            config = {'pages_to_fetch': 1}
            json.dump(config, out_file)
    self.update_daily_price(pages_to_fetch)

    tmnow = datetime.now()
    lastday = calendar.monthrange(tmnow.year, tmnow.month)[1]
    if tmnow.month == 12 and tmnow.day == lastday:
        tmnext = tmnow.replace(year=tmnow.year+1, month=1, day=1,
            hour=17, minute=0, second=0)
    elif tmnow.day == lastday:
        tmnext = tmnow.replace(month=tmnow.month+1, day=1, hour=17,
            minute=0, second=0)
    else:
        tmnext = tmnow.replace(day=tmnow.day+1, hour=17, minute=0,
            second=0)
    tmdiff = tmnext - tmnow
    secs = tmdiff.seconds
    t = Timer(secs, self.execute_daily)
    print("Waiting for next update ({}) ... ".format(tmnext.strftime
        ('%Y-%m-%d %H:%M')))
    t.start()

if name == 'main':
dbu = DBUpdater()
dbu.execute_daily()

확인해본 점검한 내용으로는

  1. pip install pymysql 등 필요한 모듈도 다 설치된 상태입니다.
  2. 마리아 디비를 설치한 상태이고 ch05_03_SelectVersion.py 점검시 아래와 같이 정상 시행됩니다.
    C:\Anaconda3\python.exe C:/Users/user/PycharmProjects/PyStudy/ch05_03_SelectVersion.py
    MariaDB version : ('10.10.2-MariaDB',)

Process finished with exit code 0

  1. INVESTAR 폴더내 Analyzer.py, MarketDB.py, DBUpdater.py 위치하고 있으며,

    Analyzer.py 내용은

import pandas as pd
import pymysql
from datetime import datetime
from datetime import timedelta
import re

class MarketDB:
def init(self):
"""생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
self.conn = pymysql.connect(host='localhost', user='root',
password=************', db='INVESTAR', charset='utf8')
self.codes = {}
self.get_comp_info()

def __del__(self):
    """소멸자: MariaDB 연결 해제"""
    self.conn.close()

def get_comp_info(self):
    """company_info 테이블에서 읽어와서 codes에 저장"""
    sql = "SELECT * FROM company_info"
    krx = pd.read_sql(sql, self.conn)
    for idx in range(len(krx)):
        self.codes[krx['code'].values[idx]] = krx['company'].values[idx]

def get_daily_price(self, code, start_date=None, end_date=None):
    """KRX 종목의 일별 시세를 데이터프레임 형태로 반환
        - code       : KRX 종목코드('005930') 또는 상장기업명('삼성전자')
        - start_date : 조회 시작일('2020-01-01'), 미입력 시 1년 전 오늘
        - end_date   : 조회 종료일('2020-12-31'), 미입력 시 오늘 날짜
    """
    if start_date is None:
        one_year_ago = datetime.today() - timedelta(days=365)
        start_date = one_year_ago.strftime('%Y-%m-%d')
        print("start_date is initialized to '{}'".format(start_date))
    else:
        start_lst = re.split('\D+', start_date)
        if start_lst[0] == '':
            start_lst = start_lst[1:]
        start_year = int(start_lst[0])
        start_month = int(start_lst[1])
        start_day = int(start_lst[2])
        if start_year < 1900 or start_year > 2200:
            print(f"ValueError: start_year({start_year:d}) is wrong.")
            return
        if start_month < 1 or start_month > 12:
            print(f"ValueError: start_month({start_month:d}) is wrong.")
            return
        if start_day < 1 or start_day > 31:
            print(f"ValueError: start_day({start_day:d}) is wrong.")
            return
        start_date = f"{start_year:04d}-{start_month:02d}-{start_day:02d}"

    if end_date is None:
        end_date = datetime.today().strftime('%Y-%m-%d')
        print("end_date is initialized to '{}'".format(end_date))
    else:
        end_lst = re.split('\D+', end_date)
        if end_lst[0] == '':
            end_lst = end_lst[1:]
        end_year = int(end_lst[0])
        end_month = int(end_lst[1])
        end_day = int(end_lst[2])
        if end_year < 1800 or end_year > 2200:
            print(f"ValueError: end_year({end_year:d}) is wrong.")
            return
        if end_month < 1 or end_month > 12:
            print(f"ValueError: end_month({end_month:d}) is wrong.")
            return
        if end_day < 1 or end_day > 31:
            print(f"ValueError: end_day({end_day:d}) is wrong.")
            return
        end_date = f"{end_year:04d}-{end_month:02d}-{end_day:02d}"

    codes_keys = list(self.codes.keys())
    codes_values = list(self.codes.values())

    if code in codes_keys:
        pass
    elif code in codes_values:
        idx = codes_values.index(code)
        code = codes_keys[idx]
    else:
        print(f"ValueError: Code({code}) doesn't exist.")
    sql = f"SELECT * FROM daily_price WHERE code = '{code}'" \
          f" and date >= '{start_date}' and date <= '{end_date}'"
    df = pd.read_sql(sql, self.conn)
    df.index = df['date']
    return df

MarketDB.py 내용은

import pandas as pd
import pymysql
from datetime import datetime
import re

class MarketDB:
def init(self):
"""생성자: MariaDB 연결 및 종목코드 딕셔너리 생성"""
self.conn = pymysql.connect(host='localhost', user='root', password='*********', db='INVESTAR', charset='utf8')
self.codes = dict()
self.getCompanyInfo()

def __del__(self):
    """소멸자: MariaDB 연결 해제"""
    self.conn.close()

def get_comp_info(self):
    """company_info 테이블에서 읽어와서 companyData와 codes에 저장"""
    sql = "SELECT * FROM company_info"
    krx = pd.read_sql(sql, self.conn)
    for idx in range(len(krx)):
        self.codes[krx['code'].values[idx]] = krx['company'].values[idx]

def get_DailyPrice(self, code, start_Date=None, end_Date=None):
    """daily_price 테이블에서 읽어와서 데이터프레임으로 반환"""
    if start_date is None:
        one_year_ago = datetime.today() - timedelta(days=365)
        start_date = one_year_ago.strftime('%Y-%m-%d')
        print("start_date is initialized to '{}'".format(start_date))



    sql = "SELECT * FROM daily_price WHERE code = '{}'and date >='{}' and date <= '{}'".format(code, start_Date,end_date)
    df = pd.read_sql(sql, self.conn)
    df.index = df['date']
    return df

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions