From b73a96d3d1f8ea7ef3a2b55fb91e4adaf575ccb6 Mon Sep 17 00:00:00 2001 From: aszerW Date: Sun, 12 Oct 2025 23:26:47 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=A2=84=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 187 ++++++++++++++++++++++++++++++++++++++ oddsjam_processed_data.py | 33 +++++++ 2 files changed, 220 insertions(+) create mode 100644 .gitignore create mode 100644 oddsjam_processed_data.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b887598 --- /dev/null +++ b/.gitignore @@ -0,0 +1,187 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be added to the global gitignore or merged into this project gitignore. For a PyCharm +# project, uncomment the following line: +#.idea/ + +# VS Code +.vscode/ + +# Docker +.dockerignore + +# OS +.DS_Store +Thumbs.db + +# Logs +*.log +logs/ + +# Database +*.db +*.sqlite3 + +# Backup files +*.bak +*.backup +backup_*.sql + +# Temporary files +*.tmp +*.temp +data/ \ No newline at end of file diff --git a/oddsjam_processed_data.py b/oddsjam_processed_data.py new file mode 100644 index 0000000..b684962 --- /dev/null +++ b/oddsjam_processed_data.py @@ -0,0 +1,33 @@ +import pandas as pd + +def restore_table_from_feather(feather_path, ddl_columns): + df = pd.read_feather(feather_path) + # 假设只有一列 + col = df.columns[0] + sample = df[col].iloc[0] + sep = '\t' + split_df = df[col].str.split(sep, expand=True) + split_df.columns = ddl_columns + return split_df + +# DDL中的字段名列表 +ddl_columns = [ + 'id', 'sportsbook', 'sport', 'league', 'fixture_id', 'game_id', 'market', 'grouping_key', 'timestamp', + 'first_name', 'second_name', 'first_selection', 'second_selection', 'first_selection_line', 'second_selection_line', + 'first_selection_points', 'second_selection_points', 'first_points', 'second_points', 'first_deep_link', + 'second_deep_link', 'first_price', 'second_price', 'first_novig_price', 'second_novig_price', + 'first_power_novig_price', 'second_power_novig_price', 'market_width', 'sportsbook_count', 'time_diff', + 'pinnacle_novig_begin_first', 'pinnacle_novig_begin_second', 'pinnacle_novig_realtime_first', + 'pinnacle_novig_realtime_second', 'pinnacle_power_novig_begin_first', 'pinnacle_power_novig_begin_second', + 'pinnacle_power_novig_realtime_first', 'pinnacle_power_novig_realtime_second', 'max_price_realtime_first', + 'max_price_realtime_second', 'gmt_created', 'gmt_modified', 'bet_id', 'max_price_sportsbook_realtime_first', + 'max_price_sportsbook_realtime_second', 'bet_status', 'res' +] + +# 使用方法 +restored_df = restore_table_from_feather( + '/Users/aszer/Documents/vscode/bet/data/api_signal_res/api_res_merged.feather', + ddl_columns +) +restored_df.to_csv('data/api_signal_res/api_res_merged_processed.csv', index=False, encoding='utf-8-sig') +print(restored_df.head())