/* weatherscrape.cpp
Copyright (c) 2008 Michael Zahniser
Please note the license terms (MIT license) at the end of this document.

Console program that prints the 10-day forecast from www.weather.com.
*/

#include <iostream>
#include <string>
#include <curl/curl.h>
#include <ctime>
#include <sstream>
#include <fstream>
#include <cstdlib>
#include <cstring>

using std::string;
using std::cout;
using std::endl;
using std::time;
using std::time_t;
using std::tm;
using std::localtime;
using std::mktime;
using std::ostringstream;
using std::ofstream;
using std::ostream;
using std::ios;
using std::memcpy;



class Buffer {
public:
	// Constructor and destructor.
	Buffer();
	~Buffer();
	
	// Clear the buffer. This resets the size to zero but keeps whatever capacity it had.
	void Clear();
	// Get the number of bytes currently stored in the buffer.
	size_t Size() const;
	// Append the given data to the buffer.
	void Write(const void *data, int byteCount);
	// Get a pointer to the start of the buffer.
	void *Data();
	const void *Data() const;
	
	// This function matches the format expected by libcurl.
	static size_t WriteFunction(void *buffer, size_t elementSize, size_t elementCount, void *file);
	

private:
	// The data.
	char *buffer;
	// The size of the data array.
	size_t capacity;
	// The number of bytes currently in use.
	size_t size;
};



CURL *OpenCURL();
// Fetch path into buffer, using curl.
bool Fetch(const string &path, Buffer &buffer, CURL *curl);
void Parse(const Buffer &buffer, ostream &out);
string PrintDate(const tm &date);
string Extract(const string &page, size_t &pos, const string &prefix, const string &suffix);
void CloseCURL(CURL *curl);



int main(int argc, char *argv[])
{
	string zipCode = "02140";
	if(argc > 1)
		zipCode = argv[1];
	
	string path = "http://www.weather.com/weather/tenday/" + zipCode;
	
	CURL *curl = OpenCURL();
	if(!curl)
		return 1;
		
	ostream *out = &cout;
	ofstream fout;
	if(argc > 2)
	{
		fout.open(argv[2], ios::app | ios::ate | ios::out);
		out = &fout;
	}
	
	Buffer buffer;
	if(Fetch(path, buffer, curl))
		Parse(buffer, *out);	

	CloseCURL(curl);

	return 0;	
}



CURL *OpenCURL()
{
	// Try to open CURL. Bail out if not successful.
	if(curl_global_init(CURL_GLOBAL_ALL))
		return NULL;
	
	// Try to create a CURL connection.
	CURL *curl = curl_easy_init();
	if(!curl)
		curl_global_cleanup();
	
	return curl;
}



bool Fetch(const string &path, Buffer &buffer, CURL *curl)
{
	buffer.Clear();

	curl_easy_setopt(curl, CURLOPT_URL, path.c_str());
	// Output data into the given buffer.
	curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, &Buffer::WriteFunction);
	curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);
	
	// Get the image.
	CURLcode result = curl_easy_perform(curl);
	curl_easy_reset(curl);

	return (result == 0);	
}



void Parse(const Buffer &buffer, ostream &out)
{
	string page(reinterpret_cast<const char *>(buffer.Data()), buffer.Size());
	
	time_t today = time(NULL);
	tm date = *localtime(&today);
	
	size_t pos = 0;
	while(true)
	{
		string timestamp = PrintDate(date);
		
		pos = page.find("class=\"tdForecast\"", pos);
		if(pos == string::npos)
			return;
			
		string forecast = Extract(page, pos, "<br>", "</p>");
		
		pos = page.find("class=\"tdTemps\"", pos);
		
		string high = Extract(page, pos, "<strong>", "</strong>");
		if(high.length() >= 5 && high.substr(high.length() - 5, 5) == "&deg;")
			high = high.substr(0, high.length() - 5);
		
		string low = Extract(page, pos, "<br>", "&deg;");
		
		pos = page.find("class=\"tdPrecip\"", pos);
		string precip = Extract(page, pos, "<p>", "&#37;");
		
		out<<timestamp<<'\t'<<forecast<<'\t'<<high<<'\t'<<low<<'\t'<<precip<<endl;
		
		// Add 1 to the day and make sure the date is still valid.
		++date.tm_mday;
		mktime(&date);
	}
}



string PrintDate(const tm &date)
{
	ostringstream out;
	out<<(1900 + date.tm_year);
	out.width(2);
	out.fill('0');
	out<<(1 + date.tm_mon);
	out.width(2);
	out.fill('0');
	out<<(date.tm_mday);
	
	return out.str();
}



string Extract(const string &page, size_t &pos, const string &prefix, const string &suffix)
{
	size_t start = page.find(prefix, pos) + prefix.length();
	size_t end = page.find(suffix, start);
	
	pos = end + suffix.length();
	
	return string(page, start, end - start);
}



void CloseCURL(CURL *curl)
{
	if(!curl)
		return;
	
	// Free the libcurl resources.
	curl_easy_cleanup(curl);
	curl_global_cleanup();
}



// Constructor:
Buffer::Buffer()
	: capacity(1024), size(0), buffer(NULL)
{
	buffer = new char[capacity];
}



// Destructor:
Buffer::~Buffer()
{
	delete [] buffer;
}



// Clear:
void Buffer::Clear()
{
	size = 0;
}



// Size:
// Get the number of bytes written to the buffer since the last Clear().
size_t Buffer::Size() const
{
	return size;
}



// Write:
// Append the given data to whatever is already in the buffer.
void Buffer::Write(const void *data, int byteCount)
{
	if(size + byteCount > capacity)
	{
		// If the capacity is exceeded, resize the buffer.
		// This is a generous estimate of how much capacity is needed, so that the
		// buffer will only rarely need to be resized.
		capacity *= 2;
		capacity += byteCount;
		
		// Copy whatever data is in the buffer already.
		char *newBuffer = new char[capacity];
		memcpy(newBuffer, buffer, size);
		
		delete [] buffer;
		buffer = newBuffer;
	}
	
	// Append this data to the end of the buffer.
	memcpy(buffer + size, data, byteCount);
	size += byteCount;
}



// Data:
// Get a pointer to the start of the buffer.
void *Buffer::Data()
{
	return reinterpret_cast<void *>(buffer);
}



// Data:
// Get a constant pointer to the start of the buffer.
const void *Buffer::Data() const
{
	return reinterpret_cast<void *>(buffer);
}



// WriteFunction:
// This function matches the format expected by libcurl.
size_t Buffer::WriteFunction(void *buffer, size_t elementSize, size_t elementCount, void *file)
{
	if(!file)
		return 0;

	Buffer &out = *reinterpret_cast<Buffer *>(file);
	out.Write(buffer, static_cast<int>(elementSize * elementCount));

	return elementCount;
}



/* Copyright (c) 2008 Michael Zahniser

Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE. */

