Published 2022. 2. 25. 17:20
해당 터미널 정보 안내 사이트에서 서울특별시 ~ 제주도 지역의 리스트를 크롤링해서 엑셀로 뽑는것을 해보겠습니다.
터미널 정보 > 터미널 안내 (bustago.or.kr)
프로젝트 구조
MakeExcel.java
package com.crawlling.common;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import net.sf.jxls.exception.ParsePropertyException;
import net.sf.jxls.transformer.XLSTransformer;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.factory.annotation.Value;
public class MakeExcel {
@Value("${spring.profiles.active}")
private String profile;
public void download(HttpServletRequest request, HttpServletResponse response,
Map<String, Object> bean, String fileName, String templateFile, String string)
throws ParsePropertyException, InvalidFormatException {
// 받아오는 매개변수 bean는 디비에서 뽑아온 데이터
// fileName 은 다운로드 받을때 지정되는 파일명
// templateFile 는 템플릿 엑셀 파일명이다.
// tempPath는 템플릿 엑셀파일이 들어가는 경로를 넣어 준다.
String tempPath = request.getSession().getServletContext().getRealPath("/WEB-INF/excel");
System.out.println("tempPath : " + tempPath);
System.out.println("profile : " + profile);
// 별도로 다운로드 만들기 귀찮으까 이런식으로 만들어서 바로 엑셀 생성후 다운 받게
try {
InputStream is = null;
//로컬
if("local".equals(profile)) {
is = new BufferedInputStream(new FileInputStream(tempPath + "\\" + templateFile));
}
//개발 운영
else {
is = new BufferedInputStream(new FileInputStream(tempPath + "/" + templateFile));
}
XLSTransformer xls = new XLSTransformer();
Workbook workbook = xls.transformXLS(is, bean);
response.setHeader("Content-Disposition", "attachment; filename=\"" + fileName + ".xlsx\"");
OutputStream os = response.getOutputStream();
workbook.write(os);
} catch (IOException e) {
e.printStackTrace();
}
}
}
Url.java
package com.crawlling.common;
/* api url 정의 */
public final class Url {
/** 크롤링 */
public static final class CRAWLING {
/** 터미널 사이트 크롤링 */
public static final String TEMINAL = "/terminal";
}
}
MainController.java
package com.crawlling.controller;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import com.crawlling.common.Url.CRAWLING;
import com.crawlling.common.MakeExcel;
import com.crawlling.service.CrawlingService;
@Controller
public class MainController {
/*
* 터미널 사이트 크롤링 후 csv 파일 떨구기
*/
@GetMapping(CRAWLING.TEMINAL)
public void crawlingTerminal(HttpServletRequest request, HttpServletResponse response, Model model) throws Exception{
// 받은 데이터를 맵에 담는다.
Map<String, Object> beans = new HashMap<String, Object>();
List<Map<String, Object>> terminalList = new ArrayList<>();
CrawlingService crawlingService = new CrawlingService();
//서울
terminalList = crawlingService.getTerminalDatas(terminalList, 11, 4);
//부산
terminalList = crawlingService.getTerminalDatas(terminalList, 26, 4);
//대구
terminalList = crawlingService.getTerminalDatas(terminalList, 27, 2);
//인천
terminalList = crawlingService.getTerminalDatas(terminalList, 28, 4);
//광주광역시
terminalList = crawlingService.getTerminalDatas(terminalList, 29, 2);
//대전광역시
terminalList = crawlingService.getTerminalDatas(terminalList, 30, 2);
//울산광역시
terminalList = crawlingService.getTerminalDatas(terminalList, 31, 3);
//세종시
terminalList = crawlingService.getTerminalDatas(terminalList, 40, 1);
//경기도
terminalList = crawlingService.getTerminalDatas(terminalList, 41, 46);
//강원도
terminalList = crawlingService.getTerminalDatas(terminalList, 42, 41);
//충청북도
terminalList = crawlingService.getTerminalDatas(terminalList, 43, 13);
//충청남도
terminalList = crawlingService.getTerminalDatas(terminalList, 44, 25);
//전라북도
terminalList = crawlingService.getTerminalDatas(terminalList, 45, 14);
//전라남도
terminalList = crawlingService.getTerminalDatas(terminalList, 46, 42);
//경상북도
terminalList = crawlingService.getTerminalDatas(terminalList, 47, 38);
//경상남도
terminalList = crawlingService.getTerminalDatas(terminalList, 48, 48);
//제주도
terminalList = crawlingService.getTerminalDatas(terminalList, 49, 1);
//평균 계약서 응답 시간
beans.put("dataList", terminalList);
// 엑셀 다운로드 메소드가 담겨 있는 객체
MakeExcel me = new MakeExcel();
//엑셀 다운로드
me.download(request, response, beans, "terminal_excelDownload.xlsx", "terminal.xlsx", null);
}
}
CrawlingService.java
package com.crawlling.service;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
@Service
public class CrawlingService {
public List<Map<String, Object>> getTerminalDatas(List<Map<String, Object>> terminalList, int teminalId, int page) throws IOException {
for(int i=1; i<=page; i++) {
String url = "https://www.bustago.or.kr/newweb/kr/terminalInfo/ajax/terminallist_ajax.do?terminalAbbrNm=&terminalId="+teminalId+"&terminalSearch=&page="+i+"";
Document doc = Jsoup.connect(url).get();
Elements contents = doc.select("table tbody tr");
for(Element content : contents){
Map<String, Object> map = new HashMap<String, Object>();
Elements tdContents = content.select("td");
map.put("boardNo", Integer.parseInt(tdContents.get(0).text()));
map.put("region", tdContents.get(1).text());
map.put("terminalNm", tdContents.get(2).text());
map.put("address", tdContents.get(3).text());
map.put("phoneNo", tdContents.get(4).text());
terminalList.add(map);
}
}
System.out.println("size : " + terminalList.size());
return terminalList;
}
}
terminal.xls
porm.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.6.4</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<groupId>com</groupId>
<artifactId>crawlling</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>crawlling</name>
<description>Demo project for Spring Boot</description>
<properties>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.3</version>
</dependency>
<!-- https://mvnrepository.com/artifact/net.sf.jxls/jxls-core -->
<dependency>
<groupId>net.sf.jxls</groupId>
<artifactId>jxls-core</artifactId>
<version>1.0.6</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
서울 ~ 제주도의 데이터가 엑셀에 들어온것을 확인을 할수 있습니다.
'스프링' 카테고리의 다른 글
[JPA] 매핑 어노테이션 정리 (0) | 2022.06.20 |
---|---|
[JPA] 데이터베이스 스키마 자동 생성 - 속성 (0) | 2022.06.20 |
[Spring] maven repository 경로 변경 (0) | 2022.02.08 |
[Spring boot] 데이터베이스 멀티 커넥션 (0) | 2021.11.24 |
[Spring] json, xml 데이터 return 하기 (0) | 2021.11.23 |