해당 터미널 정보 안내 사이트에서 서울특별시 ~ 제주도 지역의 리스트를 크롤링해서 엑셀로 뽑는것을 해보겠습니다.

터미널 정보 > 터미널 안내 (bustago.or.kr)

 

터미널 정보 > 터미널 안내

 

www.bustago.or.kr

 

 

프로젝트 구조

 

MakeExcel.java


package com.crawlling.common;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import net.sf.jxls.exception.ParsePropertyException;
import net.sf.jxls.transformer.XLSTransformer;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.beans.factory.annotation.Value;

public class MakeExcel {
	@Value("${spring.profiles.active}")
	private String profile;

	public void download(HttpServletRequest request, HttpServletResponse response,
			Map<String, Object> bean, String fileName, String templateFile, String string)
			throws ParsePropertyException, InvalidFormatException {

		// 받아오는 매개변수 bean는 디비에서 뽑아온 데이터
		// fileName 은 다운로드 받을때 지정되는 파일명
		// templateFile 는 템플릿 엑셀 파일명이다.

		// tempPath는 템플릿 엑셀파일이 들어가는 경로를 넣어 준다.
		String tempPath = request.getSession().getServletContext().getRealPath("/WEB-INF/excel");
		System.out.println("tempPath : " + tempPath);
		System.out.println("profile : " + profile);
		// 별도로 다운로드 만들기 귀찮으까 이런식으로 만들어서 바로 엑셀 생성후 다운 받게
		try {

			InputStream is = null;

			//로컬
			if("local".equals(profile)) {
				is = new BufferedInputStream(new FileInputStream(tempPath + "\\" + templateFile));
			}

			//개발 운영
			else {
				is = new BufferedInputStream(new FileInputStream(tempPath + "/" + templateFile));
			}

			XLSTransformer xls = new XLSTransformer();

			Workbook workbook = xls.transformXLS(is, bean);
			response.setHeader("Content-Disposition", "attachment; filename=\"" + fileName + ".xlsx\"");
			OutputStream os = response.getOutputStream();
			workbook.write(os);
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

 

Url.java


package com.crawlling.common;

/* api url 정의 */
public final class Url {
	
	/** 크롤링 */
	public static final class CRAWLING {
		
		/** 터미널 사이트 크롤링 */
		public static final String TEMINAL = "/terminal";
	}
}

 

MainController.java


package com.crawlling.controller;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.springframework.stereotype.Controller;
import org.springframework.ui.Model;
import org.springframework.web.bind.annotation.GetMapping;
import com.crawlling.common.Url.CRAWLING;
import com.crawlling.common.MakeExcel;
import com.crawlling.service.CrawlingService;

@Controller
public class MainController {

	/*
	 *  터미널 사이트 크롤링 후 csv 파일 떨구기
	 */
	@GetMapping(CRAWLING.TEMINAL)
	public void crawlingTerminal(HttpServletRequest request, HttpServletResponse response, Model model) throws Exception{
		
		// 받은 데이터를 맵에 담는다.
		Map<String, Object> beans = new HashMap<String, Object>();
		
		List<Map<String, Object>> terminalList = new ArrayList<>();
		
		CrawlingService crawlingService = new CrawlingService();
		
		//서울
		terminalList = crawlingService.getTerminalDatas(terminalList, 11, 4);
		
		//부산
		terminalList = crawlingService.getTerminalDatas(terminalList, 26, 4);
		
		//대구
		terminalList = crawlingService.getTerminalDatas(terminalList, 27, 2);
		
		//인천
		terminalList = crawlingService.getTerminalDatas(terminalList, 28, 4);
		
		//광주광역시
		terminalList = crawlingService.getTerminalDatas(terminalList, 29, 2);
		
		//대전광역시
		terminalList = crawlingService.getTerminalDatas(terminalList, 30, 2);
		
		//울산광역시
		terminalList = crawlingService.getTerminalDatas(terminalList, 31, 3);
		
		//세종시
		terminalList = crawlingService.getTerminalDatas(terminalList, 40, 1);
		
		//경기도
		terminalList = crawlingService.getTerminalDatas(terminalList, 41, 46);
		
		//강원도
		terminalList = crawlingService.getTerminalDatas(terminalList, 42, 41);

		//충청북도
		terminalList = crawlingService.getTerminalDatas(terminalList, 43, 13);
		
		//충청남도
		terminalList = crawlingService.getTerminalDatas(terminalList, 44, 25);
		
		//전라북도
		terminalList = crawlingService.getTerminalDatas(terminalList, 45, 14);
		
		//전라남도
		terminalList = crawlingService.getTerminalDatas(terminalList, 46, 42);
		
		//경상북도
		terminalList = crawlingService.getTerminalDatas(terminalList, 47, 38);
		
		//경상남도
		terminalList = crawlingService.getTerminalDatas(terminalList, 48, 48);
		
		//제주도
		terminalList = crawlingService.getTerminalDatas(terminalList, 49, 1);
		
		
		
		//평균 계약서 응답 시간
		beans.put("dataList", terminalList);

		// 엑셀 다운로드 메소드가 담겨 있는 객체
		MakeExcel me = new MakeExcel();

		//엑셀 다운로드
		me.download(request, response, beans, "terminal_excelDownload.xlsx", "terminal.xlsx", null);
		
	}
	
}

 

CrawlingService.java


package com.crawlling.service;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;

@Service
public class CrawlingService {
	
	public List<Map<String, Object>> getTerminalDatas(List<Map<String, Object>> terminalList, int teminalId, int page) throws IOException {
		
		for(int i=1; i<=page; i++) {
			String url = "https://www.bustago.or.kr/newweb/kr/terminalInfo/ajax/terminallist_ajax.do?terminalAbbrNm=&terminalId="+teminalId+"&terminalSearch=&page="+i+"";

			Document doc = Jsoup.connect(url).get();
			Elements contents = doc.select("table tbody tr");
			
			for(Element content : contents){
				Map<String, Object> map = new HashMap<String, Object>();
	            Elements tdContents = content.select("td");
	            map.put("boardNo", Integer.parseInt(tdContents.get(0).text()));
	            map.put("region", tdContents.get(1).text());
	            map.put("terminalNm", tdContents.get(2).text());
	            map.put("address", tdContents.get(3).text());
	            map.put("phoneNo", tdContents.get(4).text());
	            terminalList.add(map);
	        }
		}

		System.out.println("size : " + terminalList.size());
		return terminalList;
	}
	
	
}

 

terminal.xls

 

porm.xml


<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<parent>
		<groupId>org.springframework.boot</groupId>
		<artifactId>spring-boot-starter-parent</artifactId>
		<version>2.6.4</version>
		<relativePath/> <!-- lookup parent from repository -->
	</parent>
	<groupId>com</groupId>
	<artifactId>crawlling</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<name>crawlling</name>
	<description>Demo project for Spring Boot</description>
	<properties>
		<java.version>1.8</java.version>
	</properties>
	<dependencies>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-web</artifactId>
		</dependency>

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>
		
		<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
		<dependency>
		    <groupId>org.jsoup</groupId>
		    <artifactId>jsoup</artifactId>
		    <version>1.14.3</version>
		</dependency>
		
		<!-- https://mvnrepository.com/artifact/net.sf.jxls/jxls-core -->
		<dependency>
		    <groupId>net.sf.jxls</groupId>
		    <artifactId>jxls-core</artifactId>
		    <version>1.0.6</version>
		</dependency>
		
	</dependencies>

	<build>
		<plugins>
			<plugin>
				<groupId>org.springframework.boot</groupId>
				<artifactId>spring-boot-maven-plugin</artifactId>
			</plugin>
		</plugins>
	</build>

</project>

 

 

서울 ~ 제주도의 데이터가 엑셀에 들어온것을 확인을 할수 있습니다.

복사했습니다!