mirror of
https://github.com/201206030/novel-plus.git
synced 2026-02-08 01:02:29 +08:00
@@ -1,5 +1,8 @@
|
||||
package com.java2nb.novel.controller;
|
||||
|
||||
import com.java2nb.novel.core.cache.CacheKey;
|
||||
import com.java2nb.novel.core.cache.CacheService;
|
||||
import com.java2nb.novel.core.utils.HttpUtil;
|
||||
import io.github.xxyopen.model.page.PageBean;
|
||||
|
||||
import com.java2nb.novel.entity.CrawlSingleTask;
|
||||
@@ -9,6 +12,11 @@ import io.github.xxyopen.model.resp.RestResult;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.web.bind.annotation.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* @author Administrator
|
||||
*/
|
||||
@@ -19,7 +27,7 @@ public class CrawlController {
|
||||
|
||||
private final CrawlService crawlService;
|
||||
|
||||
|
||||
private final CacheService cacheService;
|
||||
/**
|
||||
* 新增爬虫源
|
||||
* */
|
||||
@@ -39,7 +47,70 @@ public class CrawlController {
|
||||
|
||||
return RestResult.ok(crawlService.listCrawlByPage(page,pageSize));
|
||||
}
|
||||
/**
|
||||
* 获取爬虫源
|
||||
* */
|
||||
@GetMapping("getCrawlSource/{id}")
|
||||
public RestResult<CrawlSource> getCrawlSource(@PathVariable("id") Integer id){
|
||||
CrawlSource crawlSource= crawlService.getCrawlSource(id);
|
||||
return RestResult.ok(crawlSource);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 测试规则
|
||||
* @param rule
|
||||
* @param url
|
||||
* @param isRefresh
|
||||
* @return
|
||||
*/
|
||||
@PostMapping("testParse")
|
||||
public RestResult<Object> testParse(String rule,String url,String isRefresh){
|
||||
|
||||
Map<String,Object> resultMap=new HashMap<>();
|
||||
String html =null;
|
||||
if(url.startsWith("https://")||url.startsWith("http://")){
|
||||
String refreshCache="1";
|
||||
if(!refreshCache.equals(isRefresh)) {
|
||||
Object cache = cacheService.getObject(CacheKey.BOOK_TEST_PARSE + url);
|
||||
if (cache == null) {
|
||||
isRefresh="1";
|
||||
}else {
|
||||
html = (String) cache;
|
||||
}
|
||||
}
|
||||
if(refreshCache.equals(isRefresh)){
|
||||
html = HttpUtil.getByHttpClientWithChrome(url);
|
||||
if (html != null) {
|
||||
cacheService.setObject(CacheKey.BOOK_TEST_PARSE + url, html, 60 * 10);
|
||||
}else{
|
||||
resultMap.put("msg","html is null");
|
||||
return RestResult.ok(resultMap);
|
||||
}
|
||||
}
|
||||
}else{
|
||||
resultMap.put("html","url is null");
|
||||
return RestResult.ok(resultMap);
|
||||
}
|
||||
Pattern pattern = Pattern.compile(rule);
|
||||
Matcher matcher = pattern.matcher(html);
|
||||
boolean isFind = matcher.find();
|
||||
resultMap.put("是否匹配",isFind);
|
||||
if(isFind){
|
||||
resultMap.put("匹配结果",matcher.group(1));
|
||||
}
|
||||
// resultMap.put("url",url);
|
||||
return RestResult.ok(resultMap);
|
||||
}
|
||||
/**
|
||||
* 修改爬虫源
|
||||
* */
|
||||
@PostMapping("updateCrawlSource")
|
||||
public RestResult<Void> updateCrawlSource(CrawlSource source) {
|
||||
crawlService.updateCrawlSource(source);
|
||||
return RestResult.ok();
|
||||
|
||||
}
|
||||
/**
|
||||
* 开启或停止爬虫
|
||||
* */
|
||||
|
||||
Reference in New Issue
Block a user