Compare commits

...

5 Commits

Author SHA1 Message Date
jyong
80b90267cc upgrade version 2025-02-17 18:19:25 +08:00
jyong
f94eef4011 Merge remote-tracking branch 'origin/feat/upgrade-unstructured-version-10.2' into feat/upgrade-unstructured-version-10.2 2025-02-17 11:46:42 +08:00
jyong
be60e91b90 upgrade Unstructured version 2025-02-17 11:46:23 +08:00
Jyong
a26d9f4899 Update build-push.yml 2025-02-17 11:28:57 +08:00
jyong
d6bd8d2bf6 upgrade Unstructured version 2025-02-17 11:27:55 +08:00
4 changed files with 2494 additions and 2440 deletions

View File

@@ -5,6 +5,7 @@ on:
branches:
- "main"
- "deploy/dev"
- "feat/upgrade-unstructured-version-10.2"
release:
types: [published]

View File

@@ -49,25 +49,25 @@ ENV TZ=UTC
WORKDIR /app/api
RUN apt-get update \
&& apt-get install -y --no-install-recommends curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
# if you located in China, you can use aliyun mirror to speed up
# && echo "deb http://mirrors.aliyun.com/debian testing main" > /etc/apt/sources.list \
&& echo "deb http://deb.debian.org/debian testing main" > /etc/apt/sources.list \
&& apt-get update \
# For Security
&& apt-get install -y --no-install-recommends zlib1g=1:1.3.dfsg+really1.3.1-1 expat=2.6.3-1 libldap-2.5-0=2.5.18+dfsg-3+b1 perl=5.40.0-6 libsqlite3-0=3.46.1-1 \
# install a chinese font to support the use of tools like matplotlib
&& apt-get install -y fonts-noto-cjk \
# Install dependencies
&& apt-get install -y --no-install-recommends \
# basic environment
curl nodejs libgmp-dev libmpfr-dev libmpc-dev \
# For Security
expat libldap-2.5-0 perl libsqlite3-0 zlib1g \
# install a chinese font to support the use of tools like matplotlib
fonts-noto-cjk \
# install libmagic to support the use of python-magic guess MIMETYPE
libmagic1 \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
# Copy Python environment and packages
ENV VIRTUAL_ENV=/app/api/.venv
COPY --from=packages ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
# Download nltk data
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('averaged_perceptron_tagger')"
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger')"
# Copy source code
COPY . /app/api/

4909
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -177,7 +177,7 @@ tencentcloud-sdk-python-hunyuan = "~3.0.1158"
tiktoken = "~0.8.0"
tokenizers = "~0.15.0"
transformers = "~4.35.0"
unstructured = { version = "~0.16.1", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] }
unstructured = { version = "~0.16.13", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] }
validators = "0.21.0"
volcengine-python-sdk = {extras = ["ark"], version = "~1.0.98"}
websocket-client = "~1.7.0"