pig join_results.pig

join_results.pig
joined = JOIN distinct_flat BY Author, author_result BY Author; 

filtered = FOREACH joined GENERATE 
>> distinct_flat::Publisher AS Publisher, 
>> distinct_flat::Author AS Author, 
>> author_result::Books AS Books;

pig group_by_year.pig

group_by_year.pig
groupByYear = GROUP books BY YearOfPublication; 
DESCRIBE groupByYear; 
groupByYear: {group: int,books: {(ISBN: chararray,BookTitle: chararray,BookAuthor: chararray,YearOfPublication: int,Publisher: chararray)}}

pig group_author_publisher.pig

group_author_publisher.pig
pub_auth = FOREACH books GENERATE Publisher, BookAuthor; 

distinct_authors = FOREACH (GROUP pub_auth BY Publisher) { 
>> da = DISTINCT pub_auth.BookAuthor; 
>> GENERATE group AS Publisher, da AS Author; 
>> }; 

distinct_flat = FOREACH distinct_authors GENERATE Publisher, FLATTEN(Author) AS Author;

pig final_results.pig

final_results.pig
result = FOREACH (GROUP filtered BY Publisher) { 
>> order_by_pub = ORDER filtered BY Publisher ASC; 
>> GENERATE group AS Publisher, order_by_pub.(Author, Books); };

pig filter_books.pig

filter_books.pig
books = FILTER books BY YearOfPublication > 0;

pig describe_books.pig

describe_books.pig
DESCRIBE books; 
books: {ISBN: chararray,BookTitle: chararray,BookAuthor: chararray,YearOfPublication: int,Publisher: chararray}

pig count_by_year.pig

count_by_year.pig
countByYear = FOREACH groupByYear 
>> GENERATE group AS YearOfPublication, COUNT($1) AS BookCount; 

DESCRIBE countByYear; 
countByYear: {YearOfPublication: int,BookCount: int}

pig author_year.pig

author_year.pig
pivot = FOREACH (GROUP books BY BookAuthor) 
>> GENERATE group AS BookAuthor, FLATTEN(books.YearOfPublication) AS Year;

pig author_result_group.pig

author_result_group.pig
author_result = FOREACH (GROUP with_count BY BookAuthor) { 
>> order_by_count = ORDER with_count BY count DESC; 
>> GENERATE group AS Author, order_by_count.(Year, count) AS Books; 
>> }; 

DESCRIBE author_result; 

author_result: {Author: chararray,Books: {(group::Year: int,count: long)}}

pig author_count_by_year.pig

author_count_by_year.pig
authorYearGroup = GROUP pivot BY (BookAuthor, Year); 

with_count = FOREACH authorYearGroup 
>> GENERATE FLATTEN(group), COUNT(pivot) as count; 

DESCRIBE with_count; 

with_count: {group::BookAuthor: chararray,group::Year: int, count: long}