feat(crawl): 新增编辑规则和测试规则

合并mstaer分支的pull request #71
This commit is contained in:
xiaoyang
2021-12-24 17:38:23 +08:00
parent fecf03b3f5
commit 96662fcb17
11 changed files with 826 additions and 7 deletions

View File

@@ -1,5 +1,8 @@
package com.java2nb.novel.controller;
import com.java2nb.novel.core.cache.CacheKey;
import com.java2nb.novel.core.cache.CacheService;
import com.java2nb.novel.core.utils.HttpUtil;
import io.github.xxyopen.model.page.PageBean;
import com.java2nb.novel.entity.CrawlSingleTask;
@@ -9,6 +12,11 @@ import io.github.xxyopen.model.resp.RestResult;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.*;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Administrator
*/
@@ -19,7 +27,7 @@ public class CrawlController {
private final CrawlService crawlService;
private final CacheService cacheService;
/**
* 新增爬虫源
* */
@@ -39,7 +47,70 @@ public class CrawlController {
return RestResult.ok(crawlService.listCrawlByPage(page,pageSize));
}
/**
* 获取爬虫源
* */
@GetMapping("getCrawlSource/{id}")
public RestResult<CrawlSource> getCrawlSource(@PathVariable("id") Integer id){
CrawlSource crawlSource= crawlService.getCrawlSource(id);
return RestResult.ok(crawlSource);
}
/**
* 测试规则
* @param rule
* @param url
* @param isRefresh
* @return
*/
@PostMapping("testParse")
public RestResult<Object> testParse(String rule,String url,String isRefresh){
Map<String,Object> resultMap=new HashMap<>();
String html =null;
if(url.startsWith("https://")||url.startsWith("http://")){
String refreshCache="1";
if(!refreshCache.equals(isRefresh)) {
Object cache = cacheService.getObject(CacheKey.BOOK_TEST_PARSE + url);
if (cache == null) {
isRefresh="1";
}else {
html = (String) cache;
}
}
if(refreshCache.equals(isRefresh)){
html = HttpUtil.getByHttpClientWithChrome(url);
if (html != null) {
cacheService.setObject(CacheKey.BOOK_TEST_PARSE + url, html, 60 * 10);
}else{
resultMap.put("msg","html is null");
return RestResult.ok(resultMap);
}
}
}else{
resultMap.put("html","url is null");
return RestResult.ok(resultMap);
}
Pattern pattern = Pattern.compile(rule);
Matcher matcher = pattern.matcher(html);
boolean isFind = matcher.find();
resultMap.put("是否匹配",isFind);
if(isFind){
resultMap.put("匹配结果",matcher.group(1));
}
// resultMap.put("url",url);
return RestResult.ok(resultMap);
}
/**
* 修改爬虫源
* */
@PostMapping("updateCrawlSource")
public RestResult<Void> updateCrawlSource(CrawlSource source) {
crawlService.updateCrawlSource(source);
return RestResult.ok();
}
/**
* 开启或停止爬虫
* */