Skip to content

Commit

Permalink
support collecting yahoo 1min data
Browse files Browse the repository at this point in the history
  • Loading branch information
zhupr committed Jan 26, 2021
1 parent 3d15169 commit 14be0c1
Show file tree
Hide file tree
Showing 4 changed files with 513 additions and 259 deletions.
2 changes: 1 addition & 1 deletion docs/component/data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ After conversion, users can find their Qlib format data in the directory `~/.qli
- `volume`
The trading volume
- `factor`
The Restoration factor
The Restoration factor; ``factor = adjusted-close / close``, `adjusted price` reference: `split adjusted <https://www.investopedia.com/terms/s/splitadjusted.asp>`_

In the convention of `Qlib` data processing, `open, close, high, low, volume, money and factor` will be set to NaN if the stock is suspended.

Expand Down
26 changes: 25 additions & 1 deletion scripts/data_collector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import time
import bisect
import pickle
import random
import requests
import functools
from pathlib import Path
Expand All @@ -17,6 +18,7 @@
HS_SYMBOLS_URL = "http://app.finance.ifeng.com/hq/list.php?type=stock_a&class={s_type}"

CALENDAR_URL_BASE = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid={market}.{bench_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg=19900101&end=20991231"
SZSE_CALENDAR_URL = "http://www.szse.cn/api/report/exchange/onepersistenthour/monthList?month={month}&random={random}"

CALENDAR_BENCH_URL_MAP = {
"CSI300": CALENDAR_URL_BASE.format(market=1, bench_code="000300"),
Expand Down Expand Up @@ -63,7 +65,29 @@ def _get_calendar(url):
df = Ticker(CALENDAR_BENCH_URL_MAP[bench_code]).history(interval="1d", period="max")
calendar = df.index.get_level_values(level="date").map(pd.Timestamp).unique().tolist()
else:
calendar = _get_calendar(CALENDAR_BENCH_URL_MAP[bench_code])
if bench_code.upper() == "ALL":

@deco_retry
def _get_calendar(month):
_cal = []
try:
resp = requests.get(SZSE_CALENDAR_URL.format(month=month, random=random.random)).json()
for _r in resp["data"]:
if int(_r["jybz"]):
_cal.append(pd.Timestamp(_r["jyrq"]))
except Exception as e:
raise ValueError(f"{month}-->{e}")
return _cal

month_range = pd.date_range(start="2000-01", end=pd.Timestamp.now() + pd.Timedelta(days=31), freq="M")
calendar = []
for _m in month_range:
cal = _get_calendar(_m.strftime("%Y-%m"))
if cal:
calendar += cal
calendar = list(filter(lambda x: x <= pd.Timestamp.now(), calendar))
else:
calendar = _get_calendar(CALENDAR_BENCH_URL_MAP[bench_code])
_CALENDAR_MAP[bench_code] = calendar
logger.info(f"end of get calendar list: {bench_code}.")
return calendar
Expand Down
72 changes: 65 additions & 7 deletions scripts/data_collector/yahoo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,87 @@ pip install -r requirements.txt

## Collector Data

### Download data and Normalize data

### CN Data

#### 1d

```bash
python collector.py collector_data --source_dir ~/.qlib/stock_data/source --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d --normalize_dir ~/.qlib/stock_data/normalize

# download from yahoo finance
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1d --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d

# normalize
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1d --normalize_dir ~/.qlib/stock_data/source/cn_1d_nor --region CN --interval 1d

# dump data
cd qlib/scripts
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/stock_data/source/qlib_cn_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol

# using
import qlib
from qlib.data import D

qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_cn_1d", region="CN")
df = D.features(D.instruments("all"), ["$close"], freq="day")

```

### Download Data
#### 1min

```bash
python collector.py download_data --source_dir ~/.qlib/stock_data/source --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d

# download from yahoo finance
python collector.py download_data --source_dir ~/.qlib/stock_data/source/cn_1min --region CN --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1min

# normalize
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/cn_1min --normalize_dir ~/.qlib/stock_data/source/cn_1min_nor --region CN --interval 1min

# dump data
cd qlib/scripts
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1min_nor --qlib_dir ~/.qlib/stock_data/source/qlib_cn_1min --freq 1min --exclude_fields date,adjclose,dividends,splits,symbol

# using
import qlib
from qlib.data import D

qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_cn_1min", region="CN")
df = D.features(D.instruments("all"), ["$close"], freq="1min")

```

### Normalize Data
### US Data

#### 1d

```bash
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source --normalize_dir ~/.qlib/stock_data/normalize --region CN

# download from yahoo finance
python collector.py download_data --source_dir ~/.qlib/stock_data/source/us_1d --region US --start 2020-11-01 --end 2020-11-10 --delay 0.1 --interval 1d

# normalize
python collector.py normalize_data --source_dir ~/.qlib/stock_data/source/us_1d --normalize_dir ~/.qlib/stock_data/source/us_1d_nor --region US --interval 1d

# dump data
cd qlib/scripts
python dump_bin.py dump_all --csv_path ~/.qlib/stock_data/source/cn_1d_nor --qlib_dir ~/.qlib/stock_data/source/qlib_us_1d --freq day --exclude_fields date,adjclose,dividends,splits,symbol

# using
import qlib
from qlib.data import D

qlib.init(provider_uri="~/.qlib/stock_data/source/qlib_us_1d", region="US")
df = D.features(D.instruments("all"), ["$close"], freq="day")

```


### Help
```bash
pythono collector.py collector_data --help
```

## Parameters

- interval: 1m or 1d
- interval: 1min or 1d
- region: CN or US
Loading

0 comments on commit 14be0c1

Please sign in to comment.