plugin/quark4k/json结构分析.md
https://quark4k.com/api/discussions?include=user%2ClastPostedUser%2CmostRelevantPost%2CmostRelevantPost.user%2Ctags%2Ctags.parent%2CfirstPost&filter[q]={关键词}&sort&page[offset]=0GETapplication/jsonhttps://quark4k.com/pan.quark.cn,需要从HTML内容中解析网盘链接和密码{
"links": {
"first": "https://quark4k.com/api/discussions?include=..."
},
"data": [
// 讨论帖子数组
],
"included": [
// 相关回复内容、用户、标签数组
]
}
data数组中的讨论帖子结构{
"type": "discussions",
"id": "1006",
"attributes": {
"title": "【印度剧】黑手遮天 第2季 (2025) 4K HDR 内封简中 夸克网盘资源下载",
"slug": "1006-yin-du-ju-hei-shou-zhe-tian-di-2ji-2025-4k-hdr-nei-feng-jian-zhong-kua-ke-wang-pan-zi-yuan-xia-zai",
"commentCount": 1,
"participantCount": 1,
"createdAt": "2025-06-13T12:55:57+00:00",
"lastPostedAt": "2025-06-13T12:55:57+00:00",
"lastPostNumber": 1,
"canReply": false,
"isApproved": true,
"isLocked": false
},
"relationships": {
"user": {
"data": {
"type": "users",
"id": "2"
}
},
"mostRelevantPost": {
"data": {
"type": "posts",
"id": "1124"
}
},
"tags": {
"data": [
{
"type": "tags",
"id": "1"
}
]
},
"firstPost": {
"data": {
"type": "posts",
"id": "1124"
}
}
}
}
included数组中的回复内容结构{
"type": "posts",
"id": "1124",
"attributes": {
"number": 1,
"createdAt": "2025-06-13T12:55:57+00:00",
"contentType": "comment",
"contentHtml": "<p>
\n剧名:黑手遮天 第2季 Rana Naidu Season 2
\n类型: 剧情
\n制片国家/地区: 印度
\n语言: 印地语
\n首播: 2025-06-13(印度网络)
\nIMDb: tt27547185</p>\n\n<p>黑手遮天 第2季的剧情............</p>\n\n<p>《黑手遮天 第2季》夸克网盘链接:<a href=\"https://pan.quark.cn/s/5881dd6b25e4\" rel=\"ugc noopener nofollow\" target=\"_blank\">https://pan.quark.cn/s/5881dd6b25e4</a></p>",
"renderFailed": false,
"editedAt": "2025-09-18T07:31:04+00:00",
"isApproved": true,
"likesCount": 0
},
"relationships": {
"user": {
"data": {
"type": "users",
"id": "2"
}
}
}
}
| 源字段 | 目标字段 | 说明 |
|---|---|---|
data[].id | UniqueID | 格式: quark4k-{discussion_id} |
data[].attributes.title | Title | 讨论标题 |
data[].attributes.createdAt | Datetime | 创建时间 |
included[].attributes.contentHtml | Content | HTML内容,需要解析提取网盘链接 |
"" | Channel | 插件搜索结果Channel为空 |
[] | Tags | 标签数组(从标题或内容中提取) |
| 解析的网盘链接 | Links | 从HTML内容中提取的网盘链接 |
<a href="...">标签形式存在,但更多是纯文本格式<a href="https://pan.quark.cn/s/5881dd6b25e4" rel="ugc noopener nofollow" target="_blank">https://pan.quark.cn/s/5881dd6b25e4</a>https://pan.quark.cn/s/5881dd6b25e4| 网盘类型 | 域名特征 | 示例链接 | 密码关键词 |
|---|---|---|---|
| 夸克网盘 | pan.quark.cn | https://pan.quark.cn/s/5881dd6b25e4 | 提取码、密码 |
重要说明: quark4k插件主要支持夸克网盘,所有链接都是pan.quark.cn域名,也可能包含其他网盘类型。
pan.quark.cn)searchURL := fmt.Sprintf("https://quark4k.com/api/discussions?include=user%%2ClastPostedUser%%2CmostRelevantPost%%2CmostRelevantPost.user%%2Ctags%%2Ctags.parent%%2CfirstPost&filter[q]=%s&sort&page[offset]=%d&page[limit]=%d", url.QueryEscape(keyword), offset, PageSize)
req.Header.Set("User-Agent", getRandomUA()) // 使用随机UA避免反爬虫
req.Header.Set("X-Forwarded-For", generateRandomIP()) // 随机IP
req.Header.Set("Accept", "application/json, text/plain, */*")
req.Header.Set("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
req.Header.Set("Connection", "keep-alive")
req.Header.Set("Sec-Fetch-Dest", "empty")
req.Header.Set("Sec-Fetch-Mode", "cors")
req.Header.Set("Sec-Fetch-Site", "same-origin")
req.Header.Set("Referer", "https://quark4k.com/")
result := model.SearchResult{
UniqueID: fmt.Sprintf("quark4k-%s", discussion.ID),
Title: discussion.Attributes.Title,
Content: extractTextFromHTML(post.Attributes.ContentHTML),
Links: extractLinksFromHTML(post.Attributes.ContentHTML),
Tags: extractTagsFromTitle(discussion.Attributes.Title),
Channel: "", // 插件搜索结果Channel为空
Datetime: parseTime(discussion.Attributes.CreatedAt),
}
// 清理HTML内容(参考pan666的cleanHTML函数)
func (p *Quark4KAsyncPlugin) cleanHTML(html string) string {
// 移除
标签
html = strings.ReplaceAll(html, "
", "\n")
html = strings.ReplaceAll(html, "
", "\n")
html = strings.ReplaceAll(html, "
", "\n")
// 移除其他HTML标签
var result strings.Builder
inTag := false
for _, r := range html {
if r == '<' {
inTag = true
continue
}
if r == '>' {
inTag = false
continue
}
if !inTag {
result.WriteRune(r)
}
}
// 处理HTML实体
output := result.String()
output = strings.ReplaceAll(output, "&", "&")
output = strings.ReplaceAll(output, "<", "<")
output = strings.ReplaceAll(output, ">", ">")
output = strings.ReplaceAll(output, """, "\"")
output = strings.ReplaceAll(output, "'", "'")
output = strings.ReplaceAll(output, "'", "'")
output = strings.ReplaceAll(output, " ", " ")
// 处理多行空白
lines := strings.Split(output, "\n")
var cleanedLines []string
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed != "" {
cleanedLines = append(cleanedLines, trimmed)
}
}
return strings.Join(cleanedLines, "\n")
}
// 从文本中提取链接(参考pan666的extractLinksFromText函数)
func (p *Quark4KAsyncPlugin) extractLinksFromText(content string) []model.Link {
var allLinks []model.Link
lines := strings.Split(content, "\n")
// 收集所有可能的链接信息
var linkInfos []struct {
link model.Link
position int
category string
}
// 收集所有可能的密码信息
var passwordInfos []struct {
keyword string
position int
password string
}
// 第一遍:查找所有的链接和密码
for i, line := range lines {
line = strings.TrimSpace(line)
// 主要检查夸克网盘
if strings.Contains(line, "pan.quark.cn") {
url := p.extractURLFromText(line)
if url != "" {
linkInfos = append(linkInfos, struct {
link model.Link
position int
category string
}{
link: model.Link{URL: url, Type: "quark"},
position: i,
category: "quark",
})
}
}
// 检查提取码/密码
passwordKeywords := []string{"提取码", "密码"}
for _, keyword := range passwordKeywords {
if strings.Contains(line, keyword) {
// 寻找冒号后面的内容
colonPos := strings.Index(line, ":")
if colonPos == -1 {
colonPos = strings.Index(line, ":")
}
if colonPos != -1 && colonPos+1 < len(line) {
password := strings.TrimSpace(line[colonPos+1:])
// 如果密码长度超过10个字符,可能不是密码
if len(password) <= 10 {
passwordInfos = append(passwordInfos, struct {
keyword string
position int
password string
}{
keyword: keyword,
position: i,
password: password,
})
}
}
}
}
}
// 第二遍:将密码与链接匹配
for i := range linkInfos {
// 检查链接自身是否包含密码
password := p.extractPasswordFromURL(linkInfos[i].link.URL)
if password != "" {
linkInfos[i].link.Password = password
continue
}
// 查找最近的密码
minDistance := 1000000
var closestPassword string
for _, pwInfo := range passwordInfos {
// 夸克网盘匹配提取码或密码
match := false
if linkInfos[i].category == "quark" && (pwInfo.keyword == "提取码" || pwInfo.keyword == "密码") {
match = true
}
if match {
distance := abs(pwInfo.position - linkInfos[i].position)
if distance < minDistance {
minDistance = distance
closestPassword = pwInfo.password
}
}
}
// 只有当距离较近时才认为是匹配的密码
if minDistance <= 3 {
linkInfos[i].link.Password = closestPassword
}
}
// 收集所有有效链接
for _, info := range linkInfos {
allLinks = append(allLinks, info.link)
}
return allLinks
}
// 从文本中提取URL
func (p *Quark4KAsyncPlugin) extractURLFromText(text string) string {
// 查找URL的起始位置
urlPrefixes := []string{"http://", "https://"}
start := -1
for _, prefix := range urlPrefixes {
pos := strings.Index(text, prefix)
if pos != -1 {
start = pos
break
}
}
if start == -1 {
return ""
}
// 查找URL的结束位置
end := len(text)
endChars := []string{" ", "\t", "\n", "\"", "'", "<", ">", ")", "]", "}", ",", ";"}
for _, char := range endChars {
pos := strings.Index(text[start:], char)
if pos != -1 && start+pos < end {
end = start + pos
}
}
return text[start:end]
}
// 从URL中提取密码
func (p *Quark4KAsyncPlugin) extractPasswordFromURL(url string) string {
// 查找密码参数
pwdParams := []string{"pwd=", "password=", "passcode=", "code="}
for _, param := range pwdParams {
pos := strings.Index(url, param)
if pos != -1 {
start := pos + len(param)
end := len(url)
// 查找参数结束位置
for i := start; i < len(url); i++ {
if url[i] == '&' || url[i] == '#' {
end = i
break
}
}
if start < end {
return url[start:end]
}
}
}
return ""
}
// 绝对值函数
func abs(n int) int {
if n < 0 {
return -n
}
return n
}
// 生成随机UA
func getRandomUA() string {
userAgents := []string{
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36",
}
return userAgents[rand.Intn(len(userAgents))]
}
// 生成随机IP
func generateRandomIP() string {
return fmt.Sprintf("%d.%d.%d.%d",
rand.Intn(223)+1, // 避免0和255
rand.Intn(255),
rand.Intn(255),
rand.Intn(254)+1) // 避免0
}
func (p *Quark4KAsyncPlugin) parseTime(timeStr string) time.Time {
// 解析ISO 8601格式时间
t, err := time.Parse(time.RFC3339, timeStr)
if err != nil {
return time.Now()
}
return t
}
type Quark4KResponse struct {
Links Quark4KLinks `json:"links"`
Data []Quark4KDiscussion `json:"data"`
Included []Quark4KIncludedItem `json:"included"`
}
type Quark4KLinks struct {
First string `json:"first"`
Next string `json:"next,omitempty"`
}
type Quark4KDiscussion struct {
Type string `json:"type"`
ID string `json:"id"`
Attributes Quark4KDiscussionAttributes `json:"attributes"`
Relationships Quark4KRelationships `json:"relationships"`
}
type Quark4KDiscussionAttributes struct {
Title string `json:"title"`
Slug string `json:"slug"`
CommentCount int `json:"commentCount"`
ParticipantCount int `json:"participantCount"`
CreatedAt string `json:"createdAt"`
LastPostedAt string `json:"lastPostedAt"`
LastPostNumber int `json:"lastPostNumber"`
IsApproved bool `json:"isApproved"`
IsLocked bool `json:"isLocked"`
}
type Quark4KRelationships struct {
MostRelevantPost Quark4KPostRef `json:"mostRelevantPost"`
}
type Quark4KPostRef struct {
Data Quark4KPostData `json:"data"`
}
type Quark4KPostData struct {
Type string `json:"type"`
ID string `json:"id"`
}
// Included 数组中可能包含多种类型(posts, users, tags)
type Quark4KIncludedItem struct {
Type string `json:"type"`
ID string `json:"id"`
Attributes json.RawMessage `json:"attributes"` // 使用RawMessage以便灵活处理
}
// Quark4KPost 帖子内容(从Included中提取)
type Quark4KPost struct {
Type string `json:"type"`
ID string `json:"id"`
Attributes Quark4KPostAttributes `json:"attributes"`
}
type Quark4KPostAttributes struct {
Number int `json:"number"`
CreatedAt string `json:"createdAt"`
ContentType string `json:"contentType"`
ContentHTML string `json:"contentHtml"`
RenderFailed bool `json:"renderFailed"`
EditedAt string `json:"editedAt,omitempty"`
IsApproved bool `json:"isApproved"`
LikesCount int `json:"likesCount"`
}
relationships.mostRelevantPost.data.id关联讨论和回复included数组中查找对应的回复内容included数组可能包含多种类型(posts, users, tags),需要过滤出posts类型<、>等)javascript:、#等)| 特性 | quark4k | pan666/bixin | 说明 |
|---|---|---|---|
| 数据源 | 论坛讨论API | 论坛讨论API | 使用相同的论坛系统 |
| API结构 | 相同 | 相同 | JSON结构完全一致 |
| 链接解析 | 文本解析 | 文本解析 | 都需要从HTML清理后的文本中提取 |
| 主要网盘 | 夸克网盘 | 移动云盘/多种网盘 | 主要提供不同网盘链接 |
| 密码匹配 | 位置关联 | 位置关联 | 使用相同的密码匹配策略 |
| 过滤策略 | 跳过Service层过滤 | 跳过Service层过滤 | 都使用NewBaseAsyncPluginWithFilter |
| 特性 | quark4k/pan666/bixin | 其他插件 | 说明 |
|---|---|---|---|
| 数据源 | 论坛讨论API | 网盘搜索API | 需要解析HTML内容 |
| 链接格式 | 纯文本格式 | 直接URL字符串 | 需要从文本中提取 |
| 内容结构 | 讨论+回复 | 直接资源信息 | 需要关联处理 |
| 链接验证 | 必需 | 可选 | 论坛可能包含无效链接 |
| 过滤策略 | 跳过Service层过滤 | 启用Service层过滤 | 论坛内容需要宽泛搜索 |
NewBaseAsyncPluginWithFilter("quark4k", 3, true)curl "https://quark4k.com/api/discussions?include=user%2ClastPostedUser%2CmostRelevantPost%2CmostRelevantPost.user%2Ctags%2Ctags.parent%2CfirstPost&filter[q]=遮天&sort&page[offset]=0&page[limit]=50" \
-H "Referer: https://quark4k.com/" \
-H "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
// 基于pan666/bixin插件进行修改
func NewQuark4KAsyncPlugin() *Quark4KAsyncPlugin {
return &Quark4KAsyncPlugin{
BaseAsyncPlugin: plugin.NewBaseAsyncPluginWithFilter("quark4k", 3, true), // 跳过Service层过滤
retries: MaxRetries,
}
}
// 主要修改点:
// 1. 更改API URL: "https://quark4k.com/api/discussions"
// 2. 更改插件名称: "quark4k"
// 3. 简化链接提取:主要处理夸克网盘(pan.quark.cn)
// 4. 简化密码匹配:只匹配"提取码"和"密码"关键词
// 5. 保持相同的HTML解析逻辑
// 6. 处理included数组时区分不同类型