Spaces:
Sleeping
Sleeping
zhou12189108
commited on
Commit
•
1dfa6cd
1
Parent(s):
c1ba86c
Upload 3 files
Browse files- Dockerfile +7 -8
- api.py +1 -16
- hcaptcha_solver.py +103 -54
Dockerfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
FROM python:3.
|
2 |
|
3 |
# Build dummy packages to skip installing them and their dependencies
|
4 |
RUN apt-get update \
|
@@ -12,14 +12,13 @@ RUN apt-get update \
|
|
12 |
&& equivs-build adwaita-icon-theme \
|
13 |
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
14 |
|
15 |
-
FROM python:3.
|
16 |
COPY --from=builder /*.deb /
|
17 |
WORKDIR /app
|
18 |
-
|
19 |
RUN apt update
|
20 |
-
RUN apt upgrade -y
|
21 |
RUN apt install -y python3 python3-pip libgl1-mesa-glx wget libglib2.0-dev sudo libpci-dev psmisc
|
22 |
-
RUN pip install playwright hcaptcha_challenger requests loguru flask Flask-Limiter
|
23 |
RUN dpkg -i /libgl1-mesa-dri.deb \
|
24 |
&& dpkg -i /adwaita-icon-theme.deb \
|
25 |
# Install dependencies
|
@@ -34,14 +33,14 @@ RUN dpkg -i /libgl1-mesa-dri.deb \
|
|
34 |
&& chown -R foxer:foxer .
|
35 |
|
36 |
RUN rm -rf /root/.cache
|
37 |
-
RUN chmod 777 -R "/usr/local/lib/python3.
|
38 |
RUN chmod 777 -R "/app/"
|
39 |
RUN playwright install firefox --with-deps
|
40 |
|
41 |
USER foxer
|
42 |
RUN playwright install firefox
|
43 |
-
COPY
|
44 |
-
|
45 |
COPY api.py .
|
46 |
EXPOSE 8081
|
47 |
|
|
|
1 |
+
FROM python:3.11-slim-bullseye as builder
|
2 |
|
3 |
# Build dummy packages to skip installing them and their dependencies
|
4 |
RUN apt-get update \
|
|
|
12 |
&& equivs-build adwaita-icon-theme \
|
13 |
&& mv adwaita-icon-theme_*.deb /adwaita-icon-theme.deb
|
14 |
|
15 |
+
FROM python:3.11-slim-bullseye
|
16 |
COPY --from=builder /*.deb /
|
17 |
WORKDIR /app
|
18 |
+
RUN echo "deb http://deb.debian.org/debian/ unstable main contrib non-free" >> /etc/apt/sources.list
|
19 |
RUN apt update
|
|
|
20 |
RUN apt install -y python3 python3-pip libgl1-mesa-glx wget libglib2.0-dev sudo libpci-dev psmisc
|
21 |
+
RUN pip install -U playwright hcaptcha_challenger requests loguru flask Flask-Limiter
|
22 |
RUN dpkg -i /libgl1-mesa-dri.deb \
|
23 |
&& dpkg -i /adwaita-icon-theme.deb \
|
24 |
# Install dependencies
|
|
|
33 |
&& chown -R foxer:foxer .
|
34 |
|
35 |
RUN rm -rf /root/.cache
|
36 |
+
RUN chmod 777 -R "/usr/local/lib/python3.11/"
|
37 |
RUN chmod 777 -R "/app/"
|
38 |
RUN playwright install firefox --with-deps
|
39 |
|
40 |
USER foxer
|
41 |
RUN playwright install firefox
|
42 |
+
COPY back.py .
|
43 |
+
|
44 |
COPY api.py .
|
45 |
EXPOSE 8081
|
46 |
|
api.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
import hashlib
|
2 |
import os
|
3 |
import asyncio
|
4 |
-
import uuid
|
5 |
-
import shutil
|
6 |
from flask import Flask, jsonify, request, logging as flog
|
7 |
from flask_limiter.util import get_remote_address
|
8 |
import hcaptcha_solver
|
@@ -21,12 +18,6 @@ def get_ipaddr():
|
|
21 |
handler = flog.default_handler
|
22 |
|
23 |
|
24 |
-
def generate_uuid():
|
25 |
-
unique_identifier = str(uuid.uuid4())
|
26 |
-
hashed_string = hashlib.sha256(unique_identifier.encode()).hexdigest()
|
27 |
-
return hashed_string
|
28 |
-
|
29 |
-
|
30 |
def get_token():
|
31 |
default_token = "init_token"
|
32 |
if os.path.exists("token"):
|
@@ -80,13 +71,7 @@ def solver_captcha():
|
|
80 |
data = request.get_json(force=True, silent=True)
|
81 |
if not check_request(require_data, data):
|
82 |
return jsonify(msg="Unauthorized Request"), 403
|
83 |
-
|
84 |
-
hcaptcha_solver.solver.install(upgrade=True)
|
85 |
-
resp=asyncio.run(hcaptcha_solver.bytedance(data["host"], data["site_key"], dir_path))
|
86 |
-
if os.path.exists(dir_path):
|
87 |
-
shutil.rmtree(dir_path)
|
88 |
-
if os.path.exists("tmp_dir"):
|
89 |
-
shutil.rmtree("tmp_dir")
|
90 |
return resp
|
91 |
|
92 |
|
|
|
|
|
1 |
import os
|
2 |
import asyncio
|
|
|
|
|
3 |
from flask import Flask, jsonify, request, logging as flog
|
4 |
from flask_limiter.util import get_remote_address
|
5 |
import hcaptcha_solver
|
|
|
18 |
handler = flog.default_handler
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def get_token():
|
22 |
default_token = "init_token"
|
23 |
if os.path.exists("token"):
|
|
|
71 |
data = request.get_json(force=True, silent=True)
|
72 |
if not check_request(require_data, data):
|
73 |
return jsonify(msg="Unauthorized Request"), 403
|
74 |
+
resp = asyncio.run(hcaptcha_solver.bytedance(data["host"], data["site_key"]))
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
return resp
|
76 |
|
77 |
|
hcaptcha_solver.py
CHANGED
@@ -1,42 +1,9 @@
|
|
1 |
-
from
|
2 |
-
|
3 |
-
import traceback
|
4 |
from loguru import logger
|
5 |
-
from playwright.async_api import BrowserContext as ASyncContext, async_playwright
|
6 |
-
|
7 |
from hcaptcha_challenger.agents import AgentT, Malenia
|
8 |
|
9 |
-
# Init local-side of the ModelHub
|
10 |
-
solver.install(upgrade=True, clip=True)
|
11 |
-
|
12 |
-
# Save dataset to current working directory
|
13 |
-
tmp_dir = Path(__file__).parent.joinpath("tmp_dir")
|
14 |
-
|
15 |
-
|
16 |
-
@logger.catch
|
17 |
-
async def hit_challenge(context: ASyncContext, host, sitekey, user_data_dir, times: int = 8):
|
18 |
-
await context.route('**/*', lambda route, request: route_continuation(route, request, host, sitekey))
|
19 |
-
page = context.pages[0]
|
20 |
-
agent = AgentT.from_page(page=page, tmp_dir=tmp_dir, self_supervised=True)
|
21 |
-
await page.goto(f"https://{host}")
|
22 |
-
|
23 |
-
await agent.handle_checkbox()
|
24 |
-
|
25 |
-
for pth in range(1, times):
|
26 |
-
result = await agent()
|
27 |
-
print(f">> {pth} - Challenge Result: {result}")
|
28 |
-
match result:
|
29 |
-
case agent.status.CHALLENGE_BACKCALL:
|
30 |
-
await page.wait_for_timeout(500)
|
31 |
-
fl = page.frame_locator(agent.HOOK_CHALLENGE)
|
32 |
-
await fl.locator("//div[@class='refresh button']").click()
|
33 |
-
case agent.status.CHALLENGE_SUCCESS:
|
34 |
-
rqdata = agent.cr.__dict__
|
35 |
-
await context.close()
|
36 |
-
return rqdata["generated_pass_UUID"]
|
37 |
-
case default:
|
38 |
-
print(result)
|
39 |
-
|
40 |
|
41 |
async def route_continuation(route, request, host, sitekey):
|
42 |
# 检查请求的URL,只拦截特定网站的请求
|
@@ -105,21 +72,103 @@ async def route_continuation(route, request, host, sitekey):
|
|
105 |
await route.continue_()
|
106 |
|
107 |
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
|
|
|
|
2 |
from loguru import logger
|
3 |
+
from playwright.async_api import BrowserContext as ASyncContext, async_playwright, Page
|
4 |
+
from hcaptcha_challenger import ModelHub, install
|
5 |
from hcaptcha_challenger.agents import AgentT, Malenia
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
async def route_continuation(route, request, host, sitekey):
|
9 |
# 检查请求的URL,只拦截特定网站的请求
|
|
|
72 |
await route.continue_()
|
73 |
|
74 |
|
75 |
+
def patch_modelhub(modelhub: ModelHub):
|
76 |
+
"""
|
77 |
+
1. Patching clip_candidates allows you to handle all image classification tasks in self-supervised mode.
|
78 |
+
|
79 |
+
2. You need to inject hints for all categories that appear in a batch of images
|
80 |
+
|
81 |
+
3. The ObjectsYaml in the GitHub repository are updated regularly,
|
82 |
+
but if you find something new, you can imitate the following and patch some hints.
|
83 |
+
|
84 |
+
4. Note that this should be a regularly changing table.
|
85 |
+
If after a while certain labels no longer appear, you should not fill them in clip_candidates
|
86 |
+
|
87 |
+
5. Please note that you only need a moderate number of candidates prompts,
|
88 |
+
too many prompts will increase the computational complexity
|
89 |
+
:param modelhub:
|
90 |
+
:return:
|
91 |
+
"""
|
92 |
+
|
93 |
+
modelhub.clip_candidates.update(
|
94 |
+
{
|
95 |
+
"the largest animal in real life": [
|
96 |
+
"parrot",
|
97 |
+
"bee",
|
98 |
+
"ladybug",
|
99 |
+
"frog",
|
100 |
+
"crab",
|
101 |
+
"bat",
|
102 |
+
"butterfly",
|
103 |
+
"dragonfly",
|
104 |
+
"giraffe",
|
105 |
+
"tiger",
|
106 |
+
"owl",
|
107 |
+
"duck"
|
108 |
+
]
|
109 |
+
}
|
110 |
+
)
|
111 |
+
|
112 |
+
|
113 |
+
def prelude(page: Page) -> AgentT:
|
114 |
+
# 1. You need to deploy sub-thread tasks and actively run `install(upgrade=True)` every 20 minutes
|
115 |
+
# 2. You need to make sure to run `install(upgrade=True, clip=True)` before each instantiation
|
116 |
+
install(upgrade=True, clip=True)
|
117 |
+
|
118 |
+
modelhub = ModelHub.from_github_repo()
|
119 |
+
modelhub.parse_objects()
|
120 |
+
|
121 |
+
# Make arbitrary pre-modifications to modelhub, which is very useful for CLIP models
|
122 |
+
patch_modelhub(modelhub)
|
123 |
+
|
124 |
+
agent = AgentT.from_page(
|
125 |
+
# page, the control handle of the Playwright Page
|
126 |
+
page=page,
|
127 |
+
# modelhub, Register modelhub externally, and the agent can patch custom configurations
|
128 |
+
modelhub=modelhub,
|
129 |
+
# clip, Enable CLIP zero-shot image classification method
|
130 |
+
clip=True,
|
131 |
+
)
|
132 |
+
|
133 |
+
return agent
|
134 |
+
|
135 |
+
|
136 |
+
async def hit_challenge(context: ASyncContext, host, sitekey, times: int = 8):
|
137 |
+
await context.route('**/*', lambda route, request: route_continuation(route, request, host, sitekey))
|
138 |
+
page = await context.new_page()
|
139 |
+
|
140 |
+
agent = prelude(page)
|
141 |
+
await page.goto(f"https://{host}")
|
142 |
+
logger.info("startup sitelink", url=f"https://{host}")
|
143 |
+
|
144 |
+
await agent.handle_checkbox()
|
145 |
+
|
146 |
+
for pth in range(1, times):
|
147 |
+
# Handle challenge
|
148 |
+
result = await agent.execute()
|
149 |
+
if not agent.qr:
|
150 |
+
return
|
151 |
+
|
152 |
+
# Post-processing
|
153 |
+
match result:
|
154 |
+
case agent.status.CHALLENGE_BACKCALL | agent.status.CHALLENGE_RETRY:
|
155 |
+
logger.warning(f"retry", pth=pth, ash=agent.ash)
|
156 |
+
await page.wait_for_timeout(500)
|
157 |
+
fl = page.frame_locator(agent.HOOK_CHALLENGE)
|
158 |
+
await fl.locator("//div[@class='refresh button']").click()
|
159 |
+
case agent.status.CHALLENGE_SUCCESS:
|
160 |
+
logger.success(f"task done", pth=pth, ash=agent.ash)
|
161 |
+
rqdata = agent.cr.__dict__
|
162 |
+
await context.close()
|
163 |
+
return rqdata["generated_pass_UUID"]
|
164 |
+
|
165 |
+
|
166 |
+
async def bytedance(host, sitekey):
|
167 |
+
async with async_playwright() as p:
|
168 |
+
browser = await p.firefox.launch(headless=False)
|
169 |
+
context = await browser.new_context(
|
170 |
+
locale="en-US"
|
171 |
+
)
|
172 |
+
await Malenia.apply_stealth(context)
|
173 |
+
|
174 |
+
await hit_challenge(context, host, sitekey)
|